fix #1622: i40e memory leak

author Fabian Grünbichler <f.gruenbichler@proxmox.com>

Fri, 19 Jan 2018 11:43:16 +0000 (12:43 +0100)

committer Fabian Grünbichler <f.gruenbichler@proxmox.com>

Fri, 19 Jan 2018 11:43:16 +0000 (12:43 +0100)
author Fabian Grünbichler <f.gruenbichler@proxmox.com>
Fri, 19 Jan 2018 11:43:16 +0000 (12:43 +0100)
committer Fabian Grünbichler <f.gruenbichler@proxmox.com>
Fri, 19 Jan 2018 11:43:16 +0000 (12:43 +0100)
diff --git a/patches/kernel/0017-i40e-Fix-memory-leak-related-filter-programming-stat.patch b/patches/kernel/0017-i40e-Fix-memory-leak-related-filter-programming-stat.patch

new file mode 100644 (file)

index 0000000..e318a18
--- /dev/null
+++ b/patches/kernel/0017-i40e-Fix-memory-leak-related-filter-programming-stat.patch
@@ -0,0 +1,127 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+Date: Wed, 4 Oct 2017 08:44:43 -0700
+Subject: [PATCH] i40e: Fix memory leak related filter programming status
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+It looks like we weren't correctly placing the pages from buffers that had
+been used to return a filter programming status back on the ring. As a
+result they were being overwritten and tracking of the pages was lost.
+
+This change works to correct that by incorporating part of
+i40e_put_rx_buffer into the programming status handler code. As a result we
+should now be correctly placing the pages for those buffers on the
+re-allocation list instead of letting them stay in place.
+
+Fixes: 0e626ff7ccbf ("i40e: Fix support for flow director programming status")
+Reported-by: Anders K. Pedersen <akp@cohaesio.com>
+Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Tested-by: Anders K Pedersen <akp@cohaesio.com>
+Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+(cherry picked from commit 2b9478ffc550f17c6cd8c69057234e91150f5972)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/net/ethernet/intel/i40e/i40e_txrx.c | 63 ++++++++++++++++-------------
+ 1 file changed, 36 insertions(+), 27 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+index 2194960d5855..391b1878c24b 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+@@ -1042,6 +1042,32 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+       return false;
+ }
+ 
++/**
++ * i40e_reuse_rx_page - page flip buffer and store it back on the ring
++ * @rx_ring: rx descriptor ring to store buffers on
++ * @old_buff: donor buffer to have page reused
++ *
++ * Synchronizes page for reuse by the adapter
++ **/
++static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
++                             struct i40e_rx_buffer *old_buff)
++{
++      struct i40e_rx_buffer *new_buff;
++      u16 nta = rx_ring->next_to_alloc;
++
++      new_buff = &rx_ring->rx_bi[nta];
++
++      /* update, and store next to alloc */
++      nta++;
++      rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
++
++      /* transfer page from old buffer to new buffer */
++      new_buff->dma           = old_buff->dma;
++      new_buff->page          = old_buff->page;
++      new_buff->page_offset   = old_buff->page_offset;
++      new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
++}
++
+ /**
+  * i40e_rx_is_programming_status - check for programming status descriptor
+  * @qw: qword representing status_error_len in CPU ordering
+@@ -1076,15 +1102,24 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
+                                         union i40e_rx_desc *rx_desc,
+                                         u64 qw)
+ {
+-      u32 ntc = rx_ring->next_to_clean + 1;
++      struct i40e_rx_buffer *rx_buffer;
++      u32 ntc = rx_ring->next_to_clean;
+       u8 id;
+ 
+       /* fetch, update, and store next to clean */
++      rx_buffer = &rx_ring->rx_bi[ntc++];
+       ntc = (ntc < rx_ring->count) ? ntc : 0;
+       rx_ring->next_to_clean = ntc;
+ 
+       prefetch(I40E_RX_DESC(rx_ring, ntc));
+ 
++      /* place unused page back on the ring */
++      i40e_reuse_rx_page(rx_ring, rx_buffer);
++      rx_ring->rx_stats.page_reuse_count++;
++
++      /* clear contents of buffer_info */
++      rx_buffer->page = NULL;
++
+       id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
+                 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
+ 
+@@ -1643,32 +1678,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
+       return false;
+ }
+ 
+-/**
+- * i40e_reuse_rx_page - page flip buffer and store it back on the ring
+- * @rx_ring: rx descriptor ring to store buffers on
+- * @old_buff: donor buffer to have page reused
+- *
+- * Synchronizes page for reuse by the adapter
+- **/
+-static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
+-                             struct i40e_rx_buffer *old_buff)
+-{
+-      struct i40e_rx_buffer *new_buff;
+-      u16 nta = rx_ring->next_to_alloc;
+-
+-      new_buff = &rx_ring->rx_bi[nta];
+-
+-      /* update, and store next to alloc */
+-      nta++;
+-      rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+-
+-      /* transfer page from old buffer to new buffer */
+-      new_buff->dma           = old_buff->dma;
+-      new_buff->page          = old_buff->page;
+-      new_buff->page_offset   = old_buff->page_offset;
+-      new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
+-}
+-
+ /**
+  * i40e_page_is_reusable - check if any reuse is possible
+  * @page: page struct to check
+-- 
+2.14.2
+
diff --git a/patches/kernel/0017-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch b/patches/kernel/0017-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch

deleted file mode 100644 (file)

index bc566a5..0000000
--- a/patches/kernel/0017-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 29 Jun 2017 08:53:20 -0700
-Subject: [PATCH] x86/mm: Add the 'nopcid' boot option to turn off PCID
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The parameter is only present on x86_64 systems to save a few bytes,
-as PCID is always disabled on x86_32.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/8bbb2e65bcd249a5f18bfb8128b4689f08ac2b60.1498751203.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 0790c9aad84901ca1bdc14746175549c8b5da215)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 62d3a63645c17611fe8ccc0c5adc5e840d9cff7b)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/admin-guide/kernel-parameters.txt |  2 ++
- arch/x86/kernel/cpu/common.c                    | 18 ++++++++++++++++++
- 2 files changed, 20 insertions(+)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 73fd6abac39b..3510e255ef4c 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -2700,6 +2700,8 @@
-       nopat           [X86] Disable PAT (page attribute table extension of
-                       pagetables) support.
- 
-+      nopcid          [X86-64] Disable the PCID cpu feature.
-+
-       norandmaps      Don't use address space randomization.  Equivalent to
-                       echo 0 > /proc/sys/kernel/randomize_va_space
- 
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index c8b39870f33e..904485e7b230 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -168,6 +168,24 @@ static int __init x86_mpx_setup(char *s)
- }
- __setup("nompx", x86_mpx_setup);
- 
-+#ifdef CONFIG_X86_64
-+static int __init x86_pcid_setup(char *s)
-+{
-+      /* require an exact match without trailing characters */
-+      if (strlen(s))
-+              return 0;
-+
-+      /* do not emit a message if the feature is not present */
-+      if (!boot_cpu_has(X86_FEATURE_PCID))
-+              return 1;
-+
-+      setup_clear_cpu_cap(X86_FEATURE_PCID);
-+      pr_info("nopcid: PCID feature disabled\n");
-+      return 1;
-+}
-+__setup("nopcid", x86_pcid_setup);
-+#endif
-+
- static int __init x86_noinvpcid_setup(char *s)
- {
-       /* noinvpcid doesn't accept parameters */
--- 
-2.14.2
-
diff --git a/patches/kernel/0018-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch b/patches/kernel/0018-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch

new file mode 100644 (file)

index 0000000..bc566a5
--- /dev/null
+++ b/patches/kernel/0018-x86-mm-Add-the-nopcid-boot-option-to-turn-off-PCID.patch
@@ -0,0 +1,83 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:20 -0700
+Subject: [PATCH] x86/mm: Add the 'nopcid' boot option to turn off PCID
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The parameter is only present on x86_64 systems to save a few bytes,
+as PCID is always disabled on x86_32.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/8bbb2e65bcd249a5f18bfb8128b4689f08ac2b60.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 0790c9aad84901ca1bdc14746175549c8b5da215)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 62d3a63645c17611fe8ccc0c5adc5e840d9cff7b)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  2 ++
+ arch/x86/kernel/cpu/common.c                    | 18 ++++++++++++++++++
+ 2 files changed, 20 insertions(+)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 73fd6abac39b..3510e255ef4c 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -2700,6 +2700,8 @@
+       nopat           [X86] Disable PAT (page attribute table extension of
+                       pagetables) support.
+ 
++      nopcid          [X86-64] Disable the PCID cpu feature.
++
+       norandmaps      Don't use address space randomization.  Equivalent to
+                       echo 0 > /proc/sys/kernel/randomize_va_space
+ 
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index c8b39870f33e..904485e7b230 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -168,6 +168,24 @@ static int __init x86_mpx_setup(char *s)
+ }
+ __setup("nompx", x86_mpx_setup);
+ 
++#ifdef CONFIG_X86_64
++static int __init x86_pcid_setup(char *s)
++{
++      /* require an exact match without trailing characters */
++      if (strlen(s))
++              return 0;
++
++      /* do not emit a message if the feature is not present */
++      if (!boot_cpu_has(X86_FEATURE_PCID))
++              return 1;
++
++      setup_clear_cpu_cap(X86_FEATURE_PCID);
++      pr_info("nopcid: PCID feature disabled\n");
++      return 1;
++}
++__setup("nopcid", x86_pcid_setup);
++#endif
++
+ static int __init x86_noinvpcid_setup(char *s)
+ {
+       /* noinvpcid doesn't accept parameters */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0018-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch b/patches/kernel/0018-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch

deleted file mode 100644 (file)

index a718862..0000000
--- a/patches/kernel/0018-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch
+++ /dev/null
@@ -1,120 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 29 Jun 2017 08:53:21 -0700
-Subject: [PATCH] x86/mm: Enable CR4.PCIDE on supported systems
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-We can use PCID if the CPU has PCID and PGE and we're not on Xen.
-
-By itself, this has no effect. A followup patch will start using PCID.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
-Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.1498751203.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 660da7c9228f685b2ebe664f9fd69aaddcc420b5)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 7d6bbe5528395f18de50bd2532843546c849883d)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/tlbflush.h |  8 ++++++++
- arch/x86/kernel/cpu/common.c    | 22 ++++++++++++++++++++++
- arch/x86/xen/enlighten_pv.c     |  6 ++++++
- 3 files changed, 36 insertions(+)
-
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 50ea3482e1d1..2b3d68093235 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -207,6 +207,14 @@ static inline void __flush_tlb_all(void)
-               __flush_tlb_global();
-       else
-               __flush_tlb();
-+
-+      /*
-+       * Note: if we somehow had PCID but not PGE, then this wouldn't work --
-+       * we'd end up flushing kernel translations for the current ASID but
-+       * we might fail to flush kernel translations for other cached ASIDs.
-+       *
-+       * To avoid this issue, we force PCID off if PGE is off.
-+       */
- }
- 
- static inline void __flush_tlb_one(unsigned long addr)
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 904485e7b230..b95cd94ca97b 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -329,6 +329,25 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
-       }
- }
- 
-+static void setup_pcid(struct cpuinfo_x86 *c)
-+{
-+      if (cpu_has(c, X86_FEATURE_PCID)) {
-+              if (cpu_has(c, X86_FEATURE_PGE)) {
-+                      cr4_set_bits(X86_CR4_PCIDE);
-+              } else {
-+                      /*
-+                       * flush_tlb_all(), as currently implemented, won't
-+                       * work if PCID is on but PGE is not.  Since that
-+                       * combination doesn't exist on real hardware, there's
-+                       * no reason to try to fully support it, but it's
-+                       * polite to avoid corrupting data if we're on
-+                       * an improperly configured VM.
-+                       */
-+                      clear_cpu_cap(c, X86_FEATURE_PCID);
-+              }
-+      }
-+}
-+
- /*
-  * Protection Keys are not available in 32-bit mode.
-  */
-@@ -1143,6 +1162,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
-       setup_smep(c);
-       setup_smap(c);
- 
-+      /* Set up PCID */
-+      setup_pcid(c);
-+
-       /*
-        * The vendor-specific functions might have changed features.
-        * Now we do "generic changes."
-diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
-index 811e4ddb3f37..290bc5ac9852 100644
---- a/arch/x86/xen/enlighten_pv.c
-+++ b/arch/x86/xen/enlighten_pv.c
-@@ -264,6 +264,12 @@ static void __init xen_init_capabilities(void)
-       setup_clear_cpu_cap(X86_FEATURE_ACC);
-       setup_clear_cpu_cap(X86_FEATURE_X2APIC);
- 
-+      /*
-+       * Xen PV would need some work to support PCID: CR3 handling as well
-+       * as xen_flush_tlb_others() would need updating.
-+       */
-+      setup_clear_cpu_cap(X86_FEATURE_PCID);
-+
-       if (!xen_initial_domain())
-               setup_clear_cpu_cap(X86_FEATURE_ACPI);
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0019-x86-mm-Document-how-CR4.PCIDE-restore-works.patch b/patches/kernel/0019-x86-mm-Document-how-CR4.PCIDE-restore-works.patch

deleted file mode 100644 (file)

index 03ccd7a..0000000
--- a/patches/kernel/0019-x86-mm-Document-how-CR4.PCIDE-restore-works.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Wed, 6 Sep 2017 19:54:54 -0700
-Subject: [PATCH] x86/mm: Document how CR4.PCIDE restore works
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-While debugging a problem, I thought that using
-cr4_set_bits_and_update_boot() to restore CR4.PCIDE would be
-helpful.  It turns out to be counterproductive.
-
-Add a comment documenting how this works.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-(cherry picked from commit 1c9fe4409ce3e9c78b1ed96ee8ed699d4f03bf33)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 0d69e4c4a2db42a9bac6609a3df15bd91163f8b9)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/common.c | 13 +++++++++++++
- 1 file changed, 13 insertions(+)
-
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index b95cd94ca97b..0b80ed14ff52 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -333,6 +333,19 @@ static void setup_pcid(struct cpuinfo_x86 *c)
- {
-       if (cpu_has(c, X86_FEATURE_PCID)) {
-               if (cpu_has(c, X86_FEATURE_PGE)) {
-+                      /*
-+                       * We'd like to use cr4_set_bits_and_update_boot(),
-+                       * but we can't.  CR4.PCIDE is special and can only
-+                       * be set in long mode, and the early CPU init code
-+                       * doesn't know this and would try to restore CR4.PCIDE
-+                       * prior to entering long mode.
-+                       *
-+                       * Instead, we rely on the fact that hotplug, resume,
-+                       * etc all fully restore CR4 before they write anything
-+                       * that could have nonzero PCID bits to CR3.  CR4.PCIDE
-+                       * has no effect on the page tables themselves, so we
-+                       * don't need it to be restored early.
-+                       */
-                       cr4_set_bits(X86_CR4_PCIDE);
-               } else {
-                       /*
--- 
-2.14.2
-
diff --git a/patches/kernel/0019-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch b/patches/kernel/0019-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch

new file mode 100644 (file)

index 0000000..a718862
--- /dev/null
+++ b/patches/kernel/0019-x86-mm-Enable-CR4.PCIDE-on-supported-systems.patch
@@ -0,0 +1,120 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:21 -0700
+Subject: [PATCH] x86/mm: Enable CR4.PCIDE on supported systems
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+We can use PCID if the CPU has PCID and PGE and we're not on Xen.
+
+By itself, this has no effect. A followup patch will start using PCID.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 660da7c9228f685b2ebe664f9fd69aaddcc420b5)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 7d6bbe5528395f18de50bd2532843546c849883d)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/tlbflush.h |  8 ++++++++
+ arch/x86/kernel/cpu/common.c    | 22 ++++++++++++++++++++++
+ arch/x86/xen/enlighten_pv.c     |  6 ++++++
+ 3 files changed, 36 insertions(+)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 50ea3482e1d1..2b3d68093235 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -207,6 +207,14 @@ static inline void __flush_tlb_all(void)
+               __flush_tlb_global();
+       else
+               __flush_tlb();
++
++      /*
++       * Note: if we somehow had PCID but not PGE, then this wouldn't work --
++       * we'd end up flushing kernel translations for the current ASID but
++       * we might fail to flush kernel translations for other cached ASIDs.
++       *
++       * To avoid this issue, we force PCID off if PGE is off.
++       */
+ }
+ 
+ static inline void __flush_tlb_one(unsigned long addr)
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 904485e7b230..b95cd94ca97b 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -329,6 +329,25 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
+       }
+ }
+ 
++static void setup_pcid(struct cpuinfo_x86 *c)
++{
++      if (cpu_has(c, X86_FEATURE_PCID)) {
++              if (cpu_has(c, X86_FEATURE_PGE)) {
++                      cr4_set_bits(X86_CR4_PCIDE);
++              } else {
++                      /*
++                       * flush_tlb_all(), as currently implemented, won't
++                       * work if PCID is on but PGE is not.  Since that
++                       * combination doesn't exist on real hardware, there's
++                       * no reason to try to fully support it, but it's
++                       * polite to avoid corrupting data if we're on
++                       * an improperly configured VM.
++                       */
++                      clear_cpu_cap(c, X86_FEATURE_PCID);
++              }
++      }
++}
++
+ /*
+  * Protection Keys are not available in 32-bit mode.
+  */
+@@ -1143,6 +1162,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
+       setup_smep(c);
+       setup_smap(c);
+ 
++      /* Set up PCID */
++      setup_pcid(c);
++
+       /*
+        * The vendor-specific functions might have changed features.
+        * Now we do "generic changes."
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index 811e4ddb3f37..290bc5ac9852 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -264,6 +264,12 @@ static void __init xen_init_capabilities(void)
+       setup_clear_cpu_cap(X86_FEATURE_ACC);
+       setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+ 
++      /*
++       * Xen PV would need some work to support PCID: CR3 handling as well
++       * as xen_flush_tlb_others() would need updating.
++       */
++      setup_clear_cpu_cap(X86_FEATURE_PCID);
++
+       if (!xen_initial_domain())
+               setup_clear_cpu_cap(X86_FEATURE_ACPI);
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0020-x86-entry-64-Refactor-IRQ-stacks-and-make-them-NMI-s.patch b/patches/kernel/0020-x86-entry-64-Refactor-IRQ-stacks-and-make-them-NMI-s.patch

deleted file mode 100644 (file)

index edabecd..0000000
--- a/patches/kernel/0020-x86-entry-64-Refactor-IRQ-stacks-and-make-them-NMI-s.patch
+++ /dev/null
@@ -1,201 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Tue, 11 Jul 2017 10:33:38 -0500
-Subject: [PATCH] x86/entry/64: Refactor IRQ stacks and make them NMI-safe
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This will allow IRQ stacks to nest inside NMIs or similar entries
-that can happen during IRQ stack setup or teardown.
-
-The new macros won't work correctly if they're invoked with IRQs on.
-Add a check under CONFIG_DEBUG_ENTRY to detect that.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-[ Use %r10 instead of %r11 in xen_do_hypervisor_callback to make objtool
-  and ORC unwinder's lives a little easier. ]
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mike Galbraith <efault@gmx.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: live-patching@vger.kernel.org
-Link: http://lkml.kernel.org/r/b0b2ff5fb97d2da2e1d7e1f380190c92545c8bb5.1499786555.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 1d3e53e8624a3ec85f4041ca6d973da7c1575938)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit be58b042e135d0ee777a54798f33015857d7e2e0)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/process_64.c |  3 ++
- arch/x86/Kconfig.debug       |  2 --
- arch/x86/entry/entry_64.S    | 85 +++++++++++++++++++++++++++++++-------------
- 3 files changed, 64 insertions(+), 26 deletions(-)
-
-diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
-index fe56e6f93cbb..1e7701c4cd80 100644
---- a/arch/x86/kernel/process_64.c
-+++ b/arch/x86/kernel/process_64.c
-@@ -404,6 +404,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-       int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
- 
-+      WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
-+                   this_cpu_read(irq_count) != -1);
-+
-       switch_fpu_prepare(prev_fpu, cpu);
- 
-       /* We must save %fs and %gs before load_TLS() because
-diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
-index cd20ca0b4043..1fc519f3c49e 100644
---- a/arch/x86/Kconfig.debug
-+++ b/arch/x86/Kconfig.debug
-@@ -305,8 +305,6 @@ config DEBUG_ENTRY
-         Some of these sanity checks may slow down kernel entries and
-         exits or otherwise impact performance.
- 
--        This is currently used to help test NMI code.
--
-         If unsure, say N.
- 
- config DEBUG_NMI_SELFTEST
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 6d078b89a5e8..07b4056af8a8 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -447,6 +447,59 @@ ENTRY(irq_entries_start)
-     .endr
- END(irq_entries_start)
- 
-+.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
-+#ifdef CONFIG_DEBUG_ENTRY
-+      pushfq
-+      testl $X86_EFLAGS_IF, (%rsp)
-+      jz .Lokay_\@
-+      ud2
-+.Lokay_\@:
-+      addq $8, %rsp
-+#endif
-+.endm
-+
-+/*
-+ * Enters the IRQ stack if we're not already using it.  NMI-safe.  Clobbers
-+ * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
-+ * Requires kernel GSBASE.
-+ *
-+ * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
-+ */
-+.macro ENTER_IRQ_STACK old_rsp
-+      DEBUG_ENTRY_ASSERT_IRQS_OFF
-+      movq    %rsp, \old_rsp
-+      incl    PER_CPU_VAR(irq_count)
-+
-+      /*
-+       * Right now, if we just incremented irq_count to zero, we've
-+       * claimed the IRQ stack but we haven't switched to it yet.
-+       *
-+       * If anything is added that can interrupt us here without using IST,
-+       * it must be *extremely* careful to limit its stack usage.  This
-+       * could include kprobes and a hypothetical future IST-less #DB
-+       * handler.
-+       */
-+
-+      cmovzq  PER_CPU_VAR(irq_stack_ptr), %rsp
-+      pushq   \old_rsp
-+.endm
-+
-+/*
-+ * Undoes ENTER_IRQ_STACK.
-+ */
-+.macro LEAVE_IRQ_STACK
-+      DEBUG_ENTRY_ASSERT_IRQS_OFF
-+      /* We need to be off the IRQ stack before decrementing irq_count. */
-+      popq    %rsp
-+
-+      /*
-+       * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
-+       * the irq stack but we're not on it.
-+       */
-+
-+      decl    PER_CPU_VAR(irq_count)
-+.endm
-+
- /*
-  * Interrupt entry/exit.
-  *
-@@ -485,17 +538,7 @@ END(irq_entries_start)
-       CALL_enter_from_user_mode
- 
- 1:
--      /*
--       * Save previous stack pointer, optionally switch to interrupt stack.
--       * irq_count is used to check if a CPU is already on an interrupt stack
--       * or not. While this is essentially redundant with preempt_count it is
--       * a little cheaper to use a separate counter in the PDA (short of
--       * moving irq_enter into assembly, which would be too much work)
--       */
--      movq    %rsp, %rdi
--      incl    PER_CPU_VAR(irq_count)
--      cmovzq  PER_CPU_VAR(irq_stack_ptr), %rsp
--      pushq   %rdi
-+      ENTER_IRQ_STACK old_rsp=%rdi
-       /* We entered an interrupt context - irqs are off: */
-       TRACE_IRQS_OFF
- 
-@@ -515,10 +558,8 @@ common_interrupt:
- ret_from_intr:
-       DISABLE_INTERRUPTS(CLBR_ANY)
-       TRACE_IRQS_OFF
--      decl    PER_CPU_VAR(irq_count)
- 
--      /* Restore saved previous stack */
--      popq    %rsp
-+      LEAVE_IRQ_STACK
- 
-       testb   $3, CS(%rsp)
-       jz      retint_kernel
-@@ -892,12 +933,10 @@ bad_gs:
- ENTRY(do_softirq_own_stack)
-       pushq   %rbp
-       mov     %rsp, %rbp
--      incl    PER_CPU_VAR(irq_count)
--      cmove   PER_CPU_VAR(irq_stack_ptr), %rsp
--      push    %rbp                            /* frame pointer backlink */
-+      ENTER_IRQ_STACK old_rsp=%r11
-       call    __do_softirq
-+      LEAVE_IRQ_STACK
-       leaveq
--      decl    PER_CPU_VAR(irq_count)
-       ret
- END(do_softirq_own_stack)
- 
-@@ -924,13 +963,11 @@ ENTRY(xen_do_hypervisor_callback)                /* do_hypervisor_callback(struct *pt_regs) */
-  * see the correct pointer to the pt_regs
-  */
-       movq    %rdi, %rsp                      /* we don't return, adjust the stack frame */
--11:   incl    PER_CPU_VAR(irq_count)
--      movq    %rsp, %rbp
--      cmovzq  PER_CPU_VAR(irq_stack_ptr), %rsp
--      pushq   %rbp                            /* frame pointer backlink */
-+
-+      ENTER_IRQ_STACK old_rsp=%r10
-       call    xen_evtchn_do_upcall
--      popq    %rsp
--      decl    PER_CPU_VAR(irq_count)
-+      LEAVE_IRQ_STACK
-+
- #ifndef CONFIG_PREEMPT
-       call    xen_maybe_preempt_hcall
- #endif
--- 
-2.14.2
-
diff --git a/patches/kernel/0020-x86-mm-Document-how-CR4.PCIDE-restore-works.patch b/patches/kernel/0020-x86-mm-Document-how-CR4.PCIDE-restore-works.patch

new file mode 100644 (file)

index 0000000..03ccd7a
--- /dev/null
+++ b/patches/kernel/0020-x86-mm-Document-how-CR4.PCIDE-restore-works.patch
@@ -0,0 +1,54 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Wed, 6 Sep 2017 19:54:54 -0700
+Subject: [PATCH] x86/mm: Document how CR4.PCIDE restore works
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+While debugging a problem, I thought that using
+cr4_set_bits_and_update_boot() to restore CR4.PCIDE would be
+helpful.  It turns out to be counterproductive.
+
+Add a comment documenting how this works.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+(cherry picked from commit 1c9fe4409ce3e9c78b1ed96ee8ed699d4f03bf33)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 0d69e4c4a2db42a9bac6609a3df15bd91163f8b9)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/common.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index b95cd94ca97b..0b80ed14ff52 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -333,6 +333,19 @@ static void setup_pcid(struct cpuinfo_x86 *c)
+ {
+       if (cpu_has(c, X86_FEATURE_PCID)) {
+               if (cpu_has(c, X86_FEATURE_PGE)) {
++                      /*
++                       * We'd like to use cr4_set_bits_and_update_boot(),
++                       * but we can't.  CR4.PCIDE is special and can only
++                       * be set in long mode, and the early CPU init code
++                       * doesn't know this and would try to restore CR4.PCIDE
++                       * prior to entering long mode.
++                       *
++                       * Instead, we rely on the fact that hotplug, resume,
++                       * etc all fully restore CR4 before they write anything
++                       * that could have nonzero PCID bits to CR3.  CR4.PCIDE
++                       * has no effect on the page tables themselves, so we
++                       * don't need it to be restored early.
++                       */
+                       cr4_set_bits(X86_CR4_PCIDE);
+               } else {
+                       /*
+-- 
+2.14.2
+
diff --git a/patches/kernel/0021-x86-entry-64-Initialize-the-top-of-the-IRQ-stack-bef.patch b/patches/kernel/0021-x86-entry-64-Initialize-the-top-of-the-IRQ-stack-bef.patch

deleted file mode 100644 (file)

index f50fc39..0000000
--- a/patches/kernel/0021-x86-entry-64-Initialize-the-top-of-the-IRQ-stack-bef.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Tue, 11 Jul 2017 10:33:39 -0500
-Subject: [PATCH] x86/entry/64: Initialize the top of the IRQ stack before
- switching stacks
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The OOPS unwinder wants the word at the top of the IRQ stack to
-point back to the previous stack at all times when the IRQ stack
-is in use.  There's currently a one-instruction window in ENTER_IRQ_STACK
-during which this isn't the case.  Fix it by writing the old RSP to the
-top of the IRQ stack before jumping.
-
-This currently writes the pointer to the stack twice, which is a bit
-ugly.  We could get rid of this by replacing irq_stack_ptr with
-irq_stack_ptr_minus_eight (better name welcome).  OTOH, there may be
-all kinds of odd microarchitectural considerations in play that
-affect performance by a few cycles here.
-
-Reported-by: Mike Galbraith <efault@gmx.de>
-Reported-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: live-patching@vger.kernel.org
-Link: http://lkml.kernel.org/r/aae7e79e49914808440ad5310ace138ced2179ca.1499786555.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 2995590964da93e1fd9a91550f9c9d9fab28f160)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit a753ff654dfd07a7f8d6f39a27126589eac7e55f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S | 24 +++++++++++++++++++++++-
- 1 file changed, 23 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 07b4056af8a8..184b70712545 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -469,6 +469,7 @@ END(irq_entries_start)
-       DEBUG_ENTRY_ASSERT_IRQS_OFF
-       movq    %rsp, \old_rsp
-       incl    PER_CPU_VAR(irq_count)
-+      jnz     .Lirq_stack_push_old_rsp_\@
- 
-       /*
-        * Right now, if we just incremented irq_count to zero, we've
-@@ -478,9 +479,30 @@ END(irq_entries_start)
-        * it must be *extremely* careful to limit its stack usage.  This
-        * could include kprobes and a hypothetical future IST-less #DB
-        * handler.
-+       *
-+       * The OOPS unwinder relies on the word at the top of the IRQ
-+       * stack linking back to the previous RSP for the entire time we're
-+       * on the IRQ stack.  For this to work reliably, we need to write
-+       * it before we actually move ourselves to the IRQ stack.
-+       */
-+
-+      movq    \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
-+      movq    PER_CPU_VAR(irq_stack_ptr), %rsp
-+
-+#ifdef CONFIG_DEBUG_ENTRY
-+      /*
-+       * If the first movq above becomes wrong due to IRQ stack layout
-+       * changes, the only way we'll notice is if we try to unwind right
-+       * here.  Assert that we set up the stack right to catch this type
-+       * of bug quickly.
-        */
-+      cmpq    -8(%rsp), \old_rsp
-+      je      .Lirq_stack_okay\@
-+      ud2
-+      .Lirq_stack_okay\@:
-+#endif
- 
--      cmovzq  PER_CPU_VAR(irq_stack_ptr), %rsp
-+.Lirq_stack_push_old_rsp_\@:
-       pushq   \old_rsp
- .endm
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0021-x86-entry-64-Refactor-IRQ-stacks-and-make-them-NMI-s.patch b/patches/kernel/0021-x86-entry-64-Refactor-IRQ-stacks-and-make-them-NMI-s.patch

new file mode 100644 (file)

index 0000000..edabecd
--- /dev/null
+++ b/patches/kernel/0021-x86-entry-64-Refactor-IRQ-stacks-and-make-them-NMI-s.patch
@@ -0,0 +1,201 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 11 Jul 2017 10:33:38 -0500
+Subject: [PATCH] x86/entry/64: Refactor IRQ stacks and make them NMI-safe
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This will allow IRQ stacks to nest inside NMIs or similar entries
+that can happen during IRQ stack setup or teardown.
+
+The new macros won't work correctly if they're invoked with IRQs on.
+Add a check under CONFIG_DEBUG_ENTRY to detect that.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+[ Use %r10 instead of %r11 in xen_do_hypervisor_callback to make objtool
+  and ORC unwinder's lives a little easier. ]
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: live-patching@vger.kernel.org
+Link: http://lkml.kernel.org/r/b0b2ff5fb97d2da2e1d7e1f380190c92545c8bb5.1499786555.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 1d3e53e8624a3ec85f4041ca6d973da7c1575938)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit be58b042e135d0ee777a54798f33015857d7e2e0)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/process_64.c |  3 ++
+ arch/x86/Kconfig.debug       |  2 --
+ arch/x86/entry/entry_64.S    | 85 +++++++++++++++++++++++++++++++-------------
+ 3 files changed, 64 insertions(+), 26 deletions(-)
+
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index fe56e6f93cbb..1e7701c4cd80 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -404,6 +404,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+       int cpu = smp_processor_id();
+       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+ 
++      WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
++                   this_cpu_read(irq_count) != -1);
++
+       switch_fpu_prepare(prev_fpu, cpu);
+ 
+       /* We must save %fs and %gs before load_TLS() because
+diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
+index cd20ca0b4043..1fc519f3c49e 100644
+--- a/arch/x86/Kconfig.debug
++++ b/arch/x86/Kconfig.debug
+@@ -305,8 +305,6 @@ config DEBUG_ENTRY
+         Some of these sanity checks may slow down kernel entries and
+         exits or otherwise impact performance.
+ 
+-        This is currently used to help test NMI code.
+-
+         If unsure, say N.
+ 
+ config DEBUG_NMI_SELFTEST
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 6d078b89a5e8..07b4056af8a8 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -447,6 +447,59 @@ ENTRY(irq_entries_start)
+     .endr
+ END(irq_entries_start)
+ 
++.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
++#ifdef CONFIG_DEBUG_ENTRY
++      pushfq
++      testl $X86_EFLAGS_IF, (%rsp)
++      jz .Lokay_\@
++      ud2
++.Lokay_\@:
++      addq $8, %rsp
++#endif
++.endm
++
++/*
++ * Enters the IRQ stack if we're not already using it.  NMI-safe.  Clobbers
++ * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
++ * Requires kernel GSBASE.
++ *
++ * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
++ */
++.macro ENTER_IRQ_STACK old_rsp
++      DEBUG_ENTRY_ASSERT_IRQS_OFF
++      movq    %rsp, \old_rsp
++      incl    PER_CPU_VAR(irq_count)
++
++      /*
++       * Right now, if we just incremented irq_count to zero, we've
++       * claimed the IRQ stack but we haven't switched to it yet.
++       *
++       * If anything is added that can interrupt us here without using IST,
++       * it must be *extremely* careful to limit its stack usage.  This
++       * could include kprobes and a hypothetical future IST-less #DB
++       * handler.
++       */
++
++      cmovzq  PER_CPU_VAR(irq_stack_ptr), %rsp
++      pushq   \old_rsp
++.endm
++
++/*
++ * Undoes ENTER_IRQ_STACK.
++ */
++.macro LEAVE_IRQ_STACK
++      DEBUG_ENTRY_ASSERT_IRQS_OFF
++      /* We need to be off the IRQ stack before decrementing irq_count. */
++      popq    %rsp
++
++      /*
++       * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
++       * the irq stack but we're not on it.
++       */
++
++      decl    PER_CPU_VAR(irq_count)
++.endm
++
+ /*
+  * Interrupt entry/exit.
+  *
+@@ -485,17 +538,7 @@ END(irq_entries_start)
+       CALL_enter_from_user_mode
+ 
+ 1:
+-      /*
+-       * Save previous stack pointer, optionally switch to interrupt stack.
+-       * irq_count is used to check if a CPU is already on an interrupt stack
+-       * or not. While this is essentially redundant with preempt_count it is
+-       * a little cheaper to use a separate counter in the PDA (short of
+-       * moving irq_enter into assembly, which would be too much work)
+-       */
+-      movq    %rsp, %rdi
+-      incl    PER_CPU_VAR(irq_count)
+-      cmovzq  PER_CPU_VAR(irq_stack_ptr), %rsp
+-      pushq   %rdi
++      ENTER_IRQ_STACK old_rsp=%rdi
+       /* We entered an interrupt context - irqs are off: */
+       TRACE_IRQS_OFF
+ 
+@@ -515,10 +558,8 @@ common_interrupt:
+ ret_from_intr:
+       DISABLE_INTERRUPTS(CLBR_ANY)
+       TRACE_IRQS_OFF
+-      decl    PER_CPU_VAR(irq_count)
+ 
+-      /* Restore saved previous stack */
+-      popq    %rsp
++      LEAVE_IRQ_STACK
+ 
+       testb   $3, CS(%rsp)
+       jz      retint_kernel
+@@ -892,12 +933,10 @@ bad_gs:
+ ENTRY(do_softirq_own_stack)
+       pushq   %rbp
+       mov     %rsp, %rbp
+-      incl    PER_CPU_VAR(irq_count)
+-      cmove   PER_CPU_VAR(irq_stack_ptr), %rsp
+-      push    %rbp                            /* frame pointer backlink */
++      ENTER_IRQ_STACK old_rsp=%r11
+       call    __do_softirq
++      LEAVE_IRQ_STACK
+       leaveq
+-      decl    PER_CPU_VAR(irq_count)
+       ret
+ END(do_softirq_own_stack)
+ 
+@@ -924,13 +963,11 @@ ENTRY(xen_do_hypervisor_callback)                /* do_hypervisor_callback(struct *pt_regs) */
+  * see the correct pointer to the pt_regs
+  */
+       movq    %rdi, %rsp                      /* we don't return, adjust the stack frame */
+-11:   incl    PER_CPU_VAR(irq_count)
+-      movq    %rsp, %rbp
+-      cmovzq  PER_CPU_VAR(irq_stack_ptr), %rsp
+-      pushq   %rbp                            /* frame pointer backlink */
++
++      ENTER_IRQ_STACK old_rsp=%r10
+       call    xen_evtchn_do_upcall
+-      popq    %rsp
+-      decl    PER_CPU_VAR(irq_count)
++      LEAVE_IRQ_STACK
++
+ #ifndef CONFIG_PREEMPT
+       call    xen_maybe_preempt_hcall
+ #endif
+-- 
+2.14.2
+
diff --git a/patches/kernel/0022-x86-entry-64-Add-unwind-hint-annotations.patch b/patches/kernel/0022-x86-entry-64-Add-unwind-hint-annotations.patch

deleted file mode 100644 (file)

index 428fcf3..0000000
--- a/patches/kernel/0022-x86-entry-64-Add-unwind-hint-annotations.patch
+++ /dev/null
@@ -1,463 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Tue, 11 Jul 2017 10:33:44 -0500
-Subject: [PATCH] x86/entry/64: Add unwind hint annotations
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Add unwind hint annotations to entry_64.S.  This will enable the ORC
-unwinder to unwind through any location in the entry code including
-syscalls, interrupts, and exceptions.
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mike Galbraith <efault@gmx.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: live-patching@vger.kernel.org
-Link: http://lkml.kernel.org/r/b9f6d478aadf68ba57c739dcfac34ec0dc021c4c.1499786555.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 8c1f75587a18ca032da8f6376d1ed882d7095289)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit a8448e6971c1e71b22c651131d14f8be76e6d399)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/Makefile   |  1 -
- arch/x86/entry/calling.h  |  5 ++++
- arch/x86/entry/entry_64.S | 71 ++++++++++++++++++++++++++++++++++++++++-------
- 3 files changed, 66 insertions(+), 11 deletions(-)
-
-diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
-index 9976fcecd17e..af28a8a24366 100644
---- a/arch/x86/entry/Makefile
-+++ b/arch/x86/entry/Makefile
-@@ -2,7 +2,6 @@
- # Makefile for the x86 low level entry code
- #
- 
--OBJECT_FILES_NON_STANDARD_entry_$(BITS).o   := y
- OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
- 
- CFLAGS_syscall_64.o           += $(call cc-option,-Wno-override-init,)
-diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
-index 05ed3d393da7..640aafebdc00 100644
---- a/arch/x86/entry/calling.h
-+++ b/arch/x86/entry/calling.h
-@@ -1,4 +1,5 @@
- #include <linux/jump_label.h>
-+#include <asm/unwind_hints.h>
- 
- /*
- 
-@@ -112,6 +113,7 @@ For 32-bit we have the following conventions - kernel is built with
-       movq %rdx, 12*8+\offset(%rsp)
-       movq %rsi, 13*8+\offset(%rsp)
-       movq %rdi, 14*8+\offset(%rsp)
-+      UNWIND_HINT_REGS offset=\offset extra=0
-       .endm
-       .macro SAVE_C_REGS offset=0
-       SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
-@@ -136,6 +138,7 @@ For 32-bit we have the following conventions - kernel is built with
-       movq %r12, 3*8+\offset(%rsp)
-       movq %rbp, 4*8+\offset(%rsp)
-       movq %rbx, 5*8+\offset(%rsp)
-+      UNWIND_HINT_REGS offset=\offset
-       .endm
- 
-       .macro RESTORE_EXTRA_REGS offset=0
-@@ -145,6 +148,7 @@ For 32-bit we have the following conventions - kernel is built with
-       movq 3*8+\offset(%rsp), %r12
-       movq 4*8+\offset(%rsp), %rbp
-       movq 5*8+\offset(%rsp), %rbx
-+      UNWIND_HINT_REGS offset=\offset extra=0
-       .endm
- 
-       .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
-@@ -167,6 +171,7 @@ For 32-bit we have the following conventions - kernel is built with
-       .endif
-       movq 13*8(%rsp), %rsi
-       movq 14*8(%rsp), %rdi
-+      UNWIND_HINT_IRET_REGS offset=16*8
-       .endm
-       .macro RESTORE_C_REGS
-       RESTORE_C_REGS_HELPER 1,1,1,1,1
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 184b70712545..64b233ab7cad 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -36,6 +36,7 @@
- #include <asm/smap.h>
- #include <asm/pgtable_types.h>
- #include <asm/export.h>
-+#include <asm/frame.h>
- #include <linux/err.h>
- 
- .code64
-@@ -43,9 +44,10 @@
- 
- #ifdef CONFIG_PARAVIRT
- ENTRY(native_usergs_sysret64)
-+      UNWIND_HINT_EMPTY
-       swapgs
-       sysretq
--ENDPROC(native_usergs_sysret64)
-+END(native_usergs_sysret64)
- #endif /* CONFIG_PARAVIRT */
- 
- .macro TRACE_IRQS_IRETQ
-@@ -134,6 +136,7 @@ ENDPROC(native_usergs_sysret64)
-  */
- 
- ENTRY(entry_SYSCALL_64)
-+      UNWIND_HINT_EMPTY
-       /*
-        * Interrupts are off on entry.
-        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
-@@ -169,6 +172,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
-       pushq   %r10                            /* pt_regs->r10 */
-       pushq   %r11                            /* pt_regs->r11 */
-       sub     $(6*8), %rsp                    /* pt_regs->bp, bx, r12-15 not saved */
-+      UNWIND_HINT_REGS extra=0
- 
-       /*
-        * If we need to do entry work or if we guess we'll need to do
-@@ -223,6 +227,7 @@ entry_SYSCALL_64_fastpath:
-       movq    EFLAGS(%rsp), %r11
-       RESTORE_C_REGS_EXCEPT_RCX_R11
-       movq    RSP(%rsp), %rsp
-+      UNWIND_HINT_EMPTY
-       USERGS_SYSRET64
- 
- 1:
-@@ -316,6 +321,7 @@ syscall_return_via_sysret:
-       /* rcx and r11 are already restored (see code above) */
-       RESTORE_C_REGS_EXCEPT_RCX_R11
-       movq    RSP(%rsp), %rsp
-+      UNWIND_HINT_EMPTY
-       USERGS_SYSRET64
- 
- opportunistic_sysret_failed:
-@@ -343,6 +349,7 @@ ENTRY(stub_ptregs_64)
-       DISABLE_INTERRUPTS(CLBR_ANY)
-       TRACE_IRQS_OFF
-       popq    %rax
-+      UNWIND_HINT_REGS extra=0
-       jmp     entry_SYSCALL64_slow_path
- 
- 1:
-@@ -351,6 +358,7 @@ END(stub_ptregs_64)
- 
- .macro ptregs_stub func
- ENTRY(ptregs_\func)
-+      UNWIND_HINT_FUNC
-       leaq    \func(%rip), %rax
-       jmp     stub_ptregs_64
- END(ptregs_\func)
-@@ -367,6 +375,7 @@ END(ptregs_\func)
-  * %rsi: next task
-  */
- ENTRY(__switch_to_asm)
-+      UNWIND_HINT_FUNC
-       /*
-        * Save callee-saved registers
-        * This must match the order in inactive_task_frame
-@@ -406,6 +415,7 @@ END(__switch_to_asm)
-  * r12: kernel thread arg
-  */
- ENTRY(ret_from_fork)
-+      UNWIND_HINT_EMPTY
-       movq    %rax, %rdi
-       call    schedule_tail                   /* rdi: 'prev' task parameter */
- 
-@@ -413,6 +423,7 @@ ENTRY(ret_from_fork)
-       jnz     1f                              /* kernel threads are uncommon */
- 
- 2:
-+      UNWIND_HINT_REGS
-       movq    %rsp, %rdi
-       call    syscall_return_slowpath /* returns with IRQs disabled */
-       TRACE_IRQS_ON                   /* user mode is traced as IRQS on */
-@@ -440,10 +451,11 @@ END(ret_from_fork)
- ENTRY(irq_entries_start)
-     vector=FIRST_EXTERNAL_VECTOR
-     .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
-+      UNWIND_HINT_IRET_REGS
-       pushq   $(~vector+0x80)                 /* Note: always in signed byte range */
--    vector=vector+1
-       jmp     common_interrupt
-       .align  8
-+      vector=vector+1
-     .endr
- END(irq_entries_start)
- 
-@@ -465,9 +477,14 @@ END(irq_entries_start)
-  *
-  * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
-  */
--.macro ENTER_IRQ_STACK old_rsp
-+.macro ENTER_IRQ_STACK regs=1 old_rsp
-       DEBUG_ENTRY_ASSERT_IRQS_OFF
-       movq    %rsp, \old_rsp
-+
-+      .if \regs
-+      UNWIND_HINT_REGS base=\old_rsp
-+      .endif
-+
-       incl    PER_CPU_VAR(irq_count)
-       jnz     .Lirq_stack_push_old_rsp_\@
- 
-@@ -504,16 +521,24 @@ END(irq_entries_start)
- 
- .Lirq_stack_push_old_rsp_\@:
-       pushq   \old_rsp
-+
-+      .if \regs
-+      UNWIND_HINT_REGS indirect=1
-+      .endif
- .endm
- 
- /*
-  * Undoes ENTER_IRQ_STACK.
-  */
--.macro LEAVE_IRQ_STACK
-+.macro LEAVE_IRQ_STACK regs=1
-       DEBUG_ENTRY_ASSERT_IRQS_OFF
-       /* We need to be off the IRQ stack before decrementing irq_count. */
-       popq    %rsp
- 
-+      .if \regs
-+      UNWIND_HINT_REGS
-+      .endif
-+
-       /*
-        * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
-        * the irq stack but we're not on it.
-@@ -624,6 +649,7 @@ restore_c_regs_and_iret:
-       INTERRUPT_RETURN
- 
- ENTRY(native_iret)
-+      UNWIND_HINT_IRET_REGS
-       /*
-        * Are we returning to a stack segment from the LDT?  Note: in
-        * 64-bit mode SS:RSP on the exception stack is always valid.
-@@ -696,6 +722,7 @@ native_irq_return_ldt:
-       orq     PER_CPU_VAR(espfix_stack), %rax
-       SWAPGS
-       movq    %rax, %rsp
-+      UNWIND_HINT_IRET_REGS offset=8
- 
-       /*
-        * At this point, we cannot write to the stack any more, but we can
-@@ -717,6 +744,7 @@ END(common_interrupt)
-  */
- .macro apicinterrupt3 num sym do_sym
- ENTRY(\sym)
-+      UNWIND_HINT_IRET_REGS
-       ASM_CLAC
-       pushq   $~(\num)
- .Lcommon_\sym:
-@@ -803,6 +831,8 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
- 
- .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
- ENTRY(\sym)
-+      UNWIND_HINT_IRET_REGS offset=8
-+
-       /* Sanity check */
-       .if \shift_ist != -1 && \paranoid == 0
-       .error "using shift_ist requires paranoid=1"
-@@ -826,6 +856,7 @@ ENTRY(\sym)
-       .else
-       call    error_entry
-       .endif
-+      UNWIND_HINT_REGS
-       /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
- 
-       .if \paranoid
-@@ -923,6 +954,7 @@ idtentry simd_coprocessor_error            do_simd_coprocessor_error       has_error_code=0
-        * edi:  new selector
-        */
- ENTRY(native_load_gs_index)
-+      FRAME_BEGIN
-       pushfq
-       DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
-       SWAPGS
-@@ -931,8 +963,9 @@ ENTRY(native_load_gs_index)
- 2:    ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
-       SWAPGS
-       popfq
-+      FRAME_END
-       ret
--END(native_load_gs_index)
-+ENDPROC(native_load_gs_index)
- EXPORT_SYMBOL(native_load_gs_index)
- 
-       _ASM_EXTABLE(.Lgs_change, bad_gs)
-@@ -955,12 +988,12 @@ bad_gs:
- ENTRY(do_softirq_own_stack)
-       pushq   %rbp
-       mov     %rsp, %rbp
--      ENTER_IRQ_STACK old_rsp=%r11
-+      ENTER_IRQ_STACK regs=0 old_rsp=%r11
-       call    __do_softirq
--      LEAVE_IRQ_STACK
-+      LEAVE_IRQ_STACK regs=0
-       leaveq
-       ret
--END(do_softirq_own_stack)
-+ENDPROC(do_softirq_own_stack)
- 
- #ifdef CONFIG_XEN
- idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
-@@ -984,7 +1017,9 @@ ENTRY(xen_do_hypervisor_callback)         /* do_hypervisor_callback(struct *pt_regs) */
-  * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
-  * see the correct pointer to the pt_regs
-  */
-+      UNWIND_HINT_FUNC
-       movq    %rdi, %rsp                      /* we don't return, adjust the stack frame */
-+      UNWIND_HINT_REGS
- 
-       ENTER_IRQ_STACK old_rsp=%r10
-       call    xen_evtchn_do_upcall
-@@ -1010,6 +1045,7 @@ END(xen_do_hypervisor_callback)
-  * with its current contents: any discrepancy means we in category 1.
-  */
- ENTRY(xen_failsafe_callback)
-+      UNWIND_HINT_EMPTY
-       movl    %ds, %ecx
-       cmpw    %cx, 0x10(%rsp)
-       jne     1f
-@@ -1029,11 +1065,13 @@ ENTRY(xen_failsafe_callback)
-       pushq   $0                              /* RIP */
-       pushq   %r11
-       pushq   %rcx
-+      UNWIND_HINT_IRET_REGS offset=8
-       jmp     general_protection
- 1:    /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
-       movq    (%rsp), %rcx
-       movq    8(%rsp), %r11
-       addq    $0x30, %rsp
-+      UNWIND_HINT_IRET_REGS
-       pushq   $-1 /* orig_ax = -1 => not a system call */
-       ALLOC_PT_GPREGS_ON_STACK
-       SAVE_C_REGS
-@@ -1079,6 +1117,7 @@ idtentry machine_check                                   has_error_code=0        paranoid=1 do_sym=*machine_check_vec
-  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
-  */
- ENTRY(paranoid_entry)
-+      UNWIND_HINT_FUNC
-       cld
-       SAVE_C_REGS 8
-       SAVE_EXTRA_REGS 8
-@@ -1106,6 +1145,7 @@ END(paranoid_entry)
-  * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
-  */
- ENTRY(paranoid_exit)
-+      UNWIND_HINT_REGS
-       DISABLE_INTERRUPTS(CLBR_ANY)
-       TRACE_IRQS_OFF_DEBUG
-       testl   %ebx, %ebx                      /* swapgs needed? */
-@@ -1127,6 +1167,7 @@ END(paranoid_exit)
-  * Return: EBX=0: came from user mode; EBX=1: otherwise
-  */
- ENTRY(error_entry)
-+      UNWIND_HINT_FUNC
-       cld
-       SAVE_C_REGS 8
-       SAVE_EXTRA_REGS 8
-@@ -1211,6 +1252,7 @@ END(error_entry)
-  *   0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
-  */
- ENTRY(error_exit)
-+      UNWIND_HINT_REGS
-       DISABLE_INTERRUPTS(CLBR_ANY)
-       TRACE_IRQS_OFF
-       testl   %ebx, %ebx
-@@ -1220,6 +1262,7 @@ END(error_exit)
- 
- /* Runs on exception stack */
- ENTRY(nmi)
-+      UNWIND_HINT_IRET_REGS
-       /*
-        * Fix up the exception frame if we're on Xen.
-        * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
-@@ -1293,11 +1336,13 @@ ENTRY(nmi)
-       cld
-       movq    %rsp, %rdx
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-+      UNWIND_HINT_IRET_REGS base=%rdx offset=8
-       pushq   5*8(%rdx)       /* pt_regs->ss */
-       pushq   4*8(%rdx)       /* pt_regs->rsp */
-       pushq   3*8(%rdx)       /* pt_regs->flags */
-       pushq   2*8(%rdx)       /* pt_regs->cs */
-       pushq   1*8(%rdx)       /* pt_regs->rip */
-+      UNWIND_HINT_IRET_REGS
-       pushq   $-1             /* pt_regs->orig_ax */
-       pushq   %rdi            /* pt_regs->di */
-       pushq   %rsi            /* pt_regs->si */
-@@ -1314,6 +1359,7 @@ ENTRY(nmi)
-       pushq   %r13            /* pt_regs->r13 */
-       pushq   %r14            /* pt_regs->r14 */
-       pushq   %r15            /* pt_regs->r15 */
-+      UNWIND_HINT_REGS
-       ENCODE_FRAME_POINTER
- 
-       /*
-@@ -1468,6 +1514,7 @@ first_nmi:
-       .rept 5
-       pushq   11*8(%rsp)
-       .endr
-+      UNWIND_HINT_IRET_REGS
- 
-       /* Everything up to here is safe from nested NMIs */
- 
-@@ -1483,6 +1530,7 @@ first_nmi:
-       pushq   $__KERNEL_CS    /* CS */
-       pushq   $1f             /* RIP */
-       INTERRUPT_RETURN        /* continues at repeat_nmi below */
-+      UNWIND_HINT_IRET_REGS
- 1:
- #endif
- 
-@@ -1532,6 +1580,7 @@ end_repeat_nmi:
-        * exceptions might do.
-        */
-       call    paranoid_entry
-+      UNWIND_HINT_REGS
- 
-       /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
-       movq    %rsp, %rdi
-@@ -1569,17 +1618,19 @@ nmi_restore:
- END(nmi)
- 
- ENTRY(ignore_sysret)
-+      UNWIND_HINT_EMPTY
-       mov     $-ENOSYS, %eax
-       sysret
- END(ignore_sysret)
- 
- ENTRY(rewind_stack_do_exit)
-+      UNWIND_HINT_FUNC
-       /* Prevent any naive code from trying to unwind to our caller. */
-       xorl    %ebp, %ebp
- 
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rax
--      leaq    -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
-+      leaq    -PTREGS_SIZE(%rax), %rsp
-+      UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
- 
-       call    do_exit
--1:    jmp 1b
- END(rewind_stack_do_exit)
--- 
-2.14.2
-
diff --git a/patches/kernel/0022-x86-entry-64-Initialize-the-top-of-the-IRQ-stack-bef.patch b/patches/kernel/0022-x86-entry-64-Initialize-the-top-of-the-IRQ-stack-bef.patch

new file mode 100644 (file)

index 0000000..f50fc39
--- /dev/null
+++ b/patches/kernel/0022-x86-entry-64-Initialize-the-top-of-the-IRQ-stack-bef.patch
@@ -0,0 +1,94 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 11 Jul 2017 10:33:39 -0500
+Subject: [PATCH] x86/entry/64: Initialize the top of the IRQ stack before
+ switching stacks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The OOPS unwinder wants the word at the top of the IRQ stack to
+point back to the previous stack at all times when the IRQ stack
+is in use.  There's currently a one-instruction window in ENTER_IRQ_STACK
+during which this isn't the case.  Fix it by writing the old RSP to the
+top of the IRQ stack before jumping.
+
+This currently writes the pointer to the stack twice, which is a bit
+ugly.  We could get rid of this by replacing irq_stack_ptr with
+irq_stack_ptr_minus_eight (better name welcome).  OTOH, there may be
+all kinds of odd microarchitectural considerations in play that
+affect performance by a few cycles here.
+
+Reported-by: Mike Galbraith <efault@gmx.de>
+Reported-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: live-patching@vger.kernel.org
+Link: http://lkml.kernel.org/r/aae7e79e49914808440ad5310ace138ced2179ca.1499786555.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 2995590964da93e1fd9a91550f9c9d9fab28f160)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit a753ff654dfd07a7f8d6f39a27126589eac7e55f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S | 24 +++++++++++++++++++++++-
+ 1 file changed, 23 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 07b4056af8a8..184b70712545 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -469,6 +469,7 @@ END(irq_entries_start)
+       DEBUG_ENTRY_ASSERT_IRQS_OFF
+       movq    %rsp, \old_rsp
+       incl    PER_CPU_VAR(irq_count)
++      jnz     .Lirq_stack_push_old_rsp_\@
+ 
+       /*
+        * Right now, if we just incremented irq_count to zero, we've
+@@ -478,9 +479,30 @@ END(irq_entries_start)
+        * it must be *extremely* careful to limit its stack usage.  This
+        * could include kprobes and a hypothetical future IST-less #DB
+        * handler.
++       *
++       * The OOPS unwinder relies on the word at the top of the IRQ
++       * stack linking back to the previous RSP for the entire time we're
++       * on the IRQ stack.  For this to work reliably, we need to write
++       * it before we actually move ourselves to the IRQ stack.
++       */
++
++      movq    \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
++      movq    PER_CPU_VAR(irq_stack_ptr), %rsp
++
++#ifdef CONFIG_DEBUG_ENTRY
++      /*
++       * If the first movq above becomes wrong due to IRQ stack layout
++       * changes, the only way we'll notice is if we try to unwind right
++       * here.  Assert that we set up the stack right to catch this type
++       * of bug quickly.
+        */
++      cmpq    -8(%rsp), \old_rsp
++      je      .Lirq_stack_okay\@
++      ud2
++      .Lirq_stack_okay\@:
++#endif
+ 
+-      cmovzq  PER_CPU_VAR(irq_stack_ptr), %rsp
++.Lirq_stack_push_old_rsp_\@:
+       pushq   \old_rsp
+ .endm
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0023-x86-entry-64-Add-unwind-hint-annotations.patch b/patches/kernel/0023-x86-entry-64-Add-unwind-hint-annotations.patch

new file mode 100644 (file)

index 0000000..428fcf3
--- /dev/null
+++ b/patches/kernel/0023-x86-entry-64-Add-unwind-hint-annotations.patch
@@ -0,0 +1,463 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Tue, 11 Jul 2017 10:33:44 -0500
+Subject: [PATCH] x86/entry/64: Add unwind hint annotations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Add unwind hint annotations to entry_64.S.  This will enable the ORC
+unwinder to unwind through any location in the entry code including
+syscalls, interrupts, and exceptions.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: live-patching@vger.kernel.org
+Link: http://lkml.kernel.org/r/b9f6d478aadf68ba57c739dcfac34ec0dc021c4c.1499786555.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 8c1f75587a18ca032da8f6376d1ed882d7095289)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit a8448e6971c1e71b22c651131d14f8be76e6d399)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/Makefile   |  1 -
+ arch/x86/entry/calling.h  |  5 ++++
+ arch/x86/entry/entry_64.S | 71 ++++++++++++++++++++++++++++++++++++++++-------
+ 3 files changed, 66 insertions(+), 11 deletions(-)
+
+diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
+index 9976fcecd17e..af28a8a24366 100644
+--- a/arch/x86/entry/Makefile
++++ b/arch/x86/entry/Makefile
+@@ -2,7 +2,6 @@
+ # Makefile for the x86 low level entry code
+ #
+ 
+-OBJECT_FILES_NON_STANDARD_entry_$(BITS).o   := y
+ OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
+ 
+ CFLAGS_syscall_64.o           += $(call cc-option,-Wno-override-init,)
+diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
+index 05ed3d393da7..640aafebdc00 100644
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -1,4 +1,5 @@
+ #include <linux/jump_label.h>
++#include <asm/unwind_hints.h>
+ 
+ /*
+ 
+@@ -112,6 +113,7 @@ For 32-bit we have the following conventions - kernel is built with
+       movq %rdx, 12*8+\offset(%rsp)
+       movq %rsi, 13*8+\offset(%rsp)
+       movq %rdi, 14*8+\offset(%rsp)
++      UNWIND_HINT_REGS offset=\offset extra=0
+       .endm
+       .macro SAVE_C_REGS offset=0
+       SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
+@@ -136,6 +138,7 @@ For 32-bit we have the following conventions - kernel is built with
+       movq %r12, 3*8+\offset(%rsp)
+       movq %rbp, 4*8+\offset(%rsp)
+       movq %rbx, 5*8+\offset(%rsp)
++      UNWIND_HINT_REGS offset=\offset
+       .endm
+ 
+       .macro RESTORE_EXTRA_REGS offset=0
+@@ -145,6 +148,7 @@ For 32-bit we have the following conventions - kernel is built with
+       movq 3*8+\offset(%rsp), %r12
+       movq 4*8+\offset(%rsp), %rbp
+       movq 5*8+\offset(%rsp), %rbx
++      UNWIND_HINT_REGS offset=\offset extra=0
+       .endm
+ 
+       .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
+@@ -167,6 +171,7 @@ For 32-bit we have the following conventions - kernel is built with
+       .endif
+       movq 13*8(%rsp), %rsi
+       movq 14*8(%rsp), %rdi
++      UNWIND_HINT_IRET_REGS offset=16*8
+       .endm
+       .macro RESTORE_C_REGS
+       RESTORE_C_REGS_HELPER 1,1,1,1,1
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 184b70712545..64b233ab7cad 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -36,6 +36,7 @@
+ #include <asm/smap.h>
+ #include <asm/pgtable_types.h>
+ #include <asm/export.h>
++#include <asm/frame.h>
+ #include <linux/err.h>
+ 
+ .code64
+@@ -43,9 +44,10 @@
+ 
+ #ifdef CONFIG_PARAVIRT
+ ENTRY(native_usergs_sysret64)
++      UNWIND_HINT_EMPTY
+       swapgs
+       sysretq
+-ENDPROC(native_usergs_sysret64)
++END(native_usergs_sysret64)
+ #endif /* CONFIG_PARAVIRT */
+ 
+ .macro TRACE_IRQS_IRETQ
+@@ -134,6 +136,7 @@ ENDPROC(native_usergs_sysret64)
+  */
+ 
+ ENTRY(entry_SYSCALL_64)
++      UNWIND_HINT_EMPTY
+       /*
+        * Interrupts are off on entry.
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+@@ -169,6 +172,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
+       pushq   %r10                            /* pt_regs->r10 */
+       pushq   %r11                            /* pt_regs->r11 */
+       sub     $(6*8), %rsp                    /* pt_regs->bp, bx, r12-15 not saved */
++      UNWIND_HINT_REGS extra=0
+ 
+       /*
+        * If we need to do entry work or if we guess we'll need to do
+@@ -223,6 +227,7 @@ entry_SYSCALL_64_fastpath:
+       movq    EFLAGS(%rsp), %r11
+       RESTORE_C_REGS_EXCEPT_RCX_R11
+       movq    RSP(%rsp), %rsp
++      UNWIND_HINT_EMPTY
+       USERGS_SYSRET64
+ 
+ 1:
+@@ -316,6 +321,7 @@ syscall_return_via_sysret:
+       /* rcx and r11 are already restored (see code above) */
+       RESTORE_C_REGS_EXCEPT_RCX_R11
+       movq    RSP(%rsp), %rsp
++      UNWIND_HINT_EMPTY
+       USERGS_SYSRET64
+ 
+ opportunistic_sysret_failed:
+@@ -343,6 +349,7 @@ ENTRY(stub_ptregs_64)
+       DISABLE_INTERRUPTS(CLBR_ANY)
+       TRACE_IRQS_OFF
+       popq    %rax
++      UNWIND_HINT_REGS extra=0
+       jmp     entry_SYSCALL64_slow_path
+ 
+ 1:
+@@ -351,6 +358,7 @@ END(stub_ptregs_64)
+ 
+ .macro ptregs_stub func
+ ENTRY(ptregs_\func)
++      UNWIND_HINT_FUNC
+       leaq    \func(%rip), %rax
+       jmp     stub_ptregs_64
+ END(ptregs_\func)
+@@ -367,6 +375,7 @@ END(ptregs_\func)
+  * %rsi: next task
+  */
+ ENTRY(__switch_to_asm)
++      UNWIND_HINT_FUNC
+       /*
+        * Save callee-saved registers
+        * This must match the order in inactive_task_frame
+@@ -406,6 +415,7 @@ END(__switch_to_asm)
+  * r12: kernel thread arg
+  */
+ ENTRY(ret_from_fork)
++      UNWIND_HINT_EMPTY
+       movq    %rax, %rdi
+       call    schedule_tail                   /* rdi: 'prev' task parameter */
+ 
+@@ -413,6 +423,7 @@ ENTRY(ret_from_fork)
+       jnz     1f                              /* kernel threads are uncommon */
+ 
+ 2:
++      UNWIND_HINT_REGS
+       movq    %rsp, %rdi
+       call    syscall_return_slowpath /* returns with IRQs disabled */
+       TRACE_IRQS_ON                   /* user mode is traced as IRQS on */
+@@ -440,10 +451,11 @@ END(ret_from_fork)
+ ENTRY(irq_entries_start)
+     vector=FIRST_EXTERNAL_VECTOR
+     .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
++      UNWIND_HINT_IRET_REGS
+       pushq   $(~vector+0x80)                 /* Note: always in signed byte range */
+-    vector=vector+1
+       jmp     common_interrupt
+       .align  8
++      vector=vector+1
+     .endr
+ END(irq_entries_start)
+ 
+@@ -465,9 +477,14 @@ END(irq_entries_start)
+  *
+  * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
+  */
+-.macro ENTER_IRQ_STACK old_rsp
++.macro ENTER_IRQ_STACK regs=1 old_rsp
+       DEBUG_ENTRY_ASSERT_IRQS_OFF
+       movq    %rsp, \old_rsp
++
++      .if \regs
++      UNWIND_HINT_REGS base=\old_rsp
++      .endif
++
+       incl    PER_CPU_VAR(irq_count)
+       jnz     .Lirq_stack_push_old_rsp_\@
+ 
+@@ -504,16 +521,24 @@ END(irq_entries_start)
+ 
+ .Lirq_stack_push_old_rsp_\@:
+       pushq   \old_rsp
++
++      .if \regs
++      UNWIND_HINT_REGS indirect=1
++      .endif
+ .endm
+ 
+ /*
+  * Undoes ENTER_IRQ_STACK.
+  */
+-.macro LEAVE_IRQ_STACK
++.macro LEAVE_IRQ_STACK regs=1
+       DEBUG_ENTRY_ASSERT_IRQS_OFF
+       /* We need to be off the IRQ stack before decrementing irq_count. */
+       popq    %rsp
+ 
++      .if \regs
++      UNWIND_HINT_REGS
++      .endif
++
+       /*
+        * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
+        * the irq stack but we're not on it.
+@@ -624,6 +649,7 @@ restore_c_regs_and_iret:
+       INTERRUPT_RETURN
+ 
+ ENTRY(native_iret)
++      UNWIND_HINT_IRET_REGS
+       /*
+        * Are we returning to a stack segment from the LDT?  Note: in
+        * 64-bit mode SS:RSP on the exception stack is always valid.
+@@ -696,6 +722,7 @@ native_irq_return_ldt:
+       orq     PER_CPU_VAR(espfix_stack), %rax
+       SWAPGS
+       movq    %rax, %rsp
++      UNWIND_HINT_IRET_REGS offset=8
+ 
+       /*
+        * At this point, we cannot write to the stack any more, but we can
+@@ -717,6 +744,7 @@ END(common_interrupt)
+  */
+ .macro apicinterrupt3 num sym do_sym
+ ENTRY(\sym)
++      UNWIND_HINT_IRET_REGS
+       ASM_CLAC
+       pushq   $~(\num)
+ .Lcommon_\sym:
+@@ -803,6 +831,8 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
+ 
+ .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+ ENTRY(\sym)
++      UNWIND_HINT_IRET_REGS offset=8
++
+       /* Sanity check */
+       .if \shift_ist != -1 && \paranoid == 0
+       .error "using shift_ist requires paranoid=1"
+@@ -826,6 +856,7 @@ ENTRY(\sym)
+       .else
+       call    error_entry
+       .endif
++      UNWIND_HINT_REGS
+       /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
+ 
+       .if \paranoid
+@@ -923,6 +954,7 @@ idtentry simd_coprocessor_error            do_simd_coprocessor_error       has_error_code=0
+        * edi:  new selector
+        */
+ ENTRY(native_load_gs_index)
++      FRAME_BEGIN
+       pushfq
+       DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
+       SWAPGS
+@@ -931,8 +963,9 @@ ENTRY(native_load_gs_index)
+ 2:    ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
+       SWAPGS
+       popfq
++      FRAME_END
+       ret
+-END(native_load_gs_index)
++ENDPROC(native_load_gs_index)
+ EXPORT_SYMBOL(native_load_gs_index)
+ 
+       _ASM_EXTABLE(.Lgs_change, bad_gs)
+@@ -955,12 +988,12 @@ bad_gs:
+ ENTRY(do_softirq_own_stack)
+       pushq   %rbp
+       mov     %rsp, %rbp
+-      ENTER_IRQ_STACK old_rsp=%r11
++      ENTER_IRQ_STACK regs=0 old_rsp=%r11
+       call    __do_softirq
+-      LEAVE_IRQ_STACK
++      LEAVE_IRQ_STACK regs=0
+       leaveq
+       ret
+-END(do_softirq_own_stack)
++ENDPROC(do_softirq_own_stack)
+ 
+ #ifdef CONFIG_XEN
+ idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
+@@ -984,7 +1017,9 @@ ENTRY(xen_do_hypervisor_callback)         /* do_hypervisor_callback(struct *pt_regs) */
+  * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
+  * see the correct pointer to the pt_regs
+  */
++      UNWIND_HINT_FUNC
+       movq    %rdi, %rsp                      /* we don't return, adjust the stack frame */
++      UNWIND_HINT_REGS
+ 
+       ENTER_IRQ_STACK old_rsp=%r10
+       call    xen_evtchn_do_upcall
+@@ -1010,6 +1045,7 @@ END(xen_do_hypervisor_callback)
+  * with its current contents: any discrepancy means we in category 1.
+  */
+ ENTRY(xen_failsafe_callback)
++      UNWIND_HINT_EMPTY
+       movl    %ds, %ecx
+       cmpw    %cx, 0x10(%rsp)
+       jne     1f
+@@ -1029,11 +1065,13 @@ ENTRY(xen_failsafe_callback)
+       pushq   $0                              /* RIP */
+       pushq   %r11
+       pushq   %rcx
++      UNWIND_HINT_IRET_REGS offset=8
+       jmp     general_protection
+ 1:    /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
+       movq    (%rsp), %rcx
+       movq    8(%rsp), %r11
+       addq    $0x30, %rsp
++      UNWIND_HINT_IRET_REGS
+       pushq   $-1 /* orig_ax = -1 => not a system call */
+       ALLOC_PT_GPREGS_ON_STACK
+       SAVE_C_REGS
+@@ -1079,6 +1117,7 @@ idtentry machine_check                                   has_error_code=0        paranoid=1 do_sym=*machine_check_vec
+  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+  */
+ ENTRY(paranoid_entry)
++      UNWIND_HINT_FUNC
+       cld
+       SAVE_C_REGS 8
+       SAVE_EXTRA_REGS 8
+@@ -1106,6 +1145,7 @@ END(paranoid_entry)
+  * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
+  */
+ ENTRY(paranoid_exit)
++      UNWIND_HINT_REGS
+       DISABLE_INTERRUPTS(CLBR_ANY)
+       TRACE_IRQS_OFF_DEBUG
+       testl   %ebx, %ebx                      /* swapgs needed? */
+@@ -1127,6 +1167,7 @@ END(paranoid_exit)
+  * Return: EBX=0: came from user mode; EBX=1: otherwise
+  */
+ ENTRY(error_entry)
++      UNWIND_HINT_FUNC
+       cld
+       SAVE_C_REGS 8
+       SAVE_EXTRA_REGS 8
+@@ -1211,6 +1252,7 @@ END(error_entry)
+  *   0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
+  */
+ ENTRY(error_exit)
++      UNWIND_HINT_REGS
+       DISABLE_INTERRUPTS(CLBR_ANY)
+       TRACE_IRQS_OFF
+       testl   %ebx, %ebx
+@@ -1220,6 +1262,7 @@ END(error_exit)
+ 
+ /* Runs on exception stack */
+ ENTRY(nmi)
++      UNWIND_HINT_IRET_REGS
+       /*
+        * Fix up the exception frame if we're on Xen.
+        * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
+@@ -1293,11 +1336,13 @@ ENTRY(nmi)
+       cld
+       movq    %rsp, %rdx
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
++      UNWIND_HINT_IRET_REGS base=%rdx offset=8
+       pushq   5*8(%rdx)       /* pt_regs->ss */
+       pushq   4*8(%rdx)       /* pt_regs->rsp */
+       pushq   3*8(%rdx)       /* pt_regs->flags */
+       pushq   2*8(%rdx)       /* pt_regs->cs */
+       pushq   1*8(%rdx)       /* pt_regs->rip */
++      UNWIND_HINT_IRET_REGS
+       pushq   $-1             /* pt_regs->orig_ax */
+       pushq   %rdi            /* pt_regs->di */
+       pushq   %rsi            /* pt_regs->si */
+@@ -1314,6 +1359,7 @@ ENTRY(nmi)
+       pushq   %r13            /* pt_regs->r13 */
+       pushq   %r14            /* pt_regs->r14 */
+       pushq   %r15            /* pt_regs->r15 */
++      UNWIND_HINT_REGS
+       ENCODE_FRAME_POINTER
+ 
+       /*
+@@ -1468,6 +1514,7 @@ first_nmi:
+       .rept 5
+       pushq   11*8(%rsp)
+       .endr
++      UNWIND_HINT_IRET_REGS
+ 
+       /* Everything up to here is safe from nested NMIs */
+ 
+@@ -1483,6 +1530,7 @@ first_nmi:
+       pushq   $__KERNEL_CS    /* CS */
+       pushq   $1f             /* RIP */
+       INTERRUPT_RETURN        /* continues at repeat_nmi below */
++      UNWIND_HINT_IRET_REGS
+ 1:
+ #endif
+ 
+@@ -1532,6 +1580,7 @@ end_repeat_nmi:
+        * exceptions might do.
+        */
+       call    paranoid_entry
++      UNWIND_HINT_REGS
+ 
+       /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+       movq    %rsp, %rdi
+@@ -1569,17 +1618,19 @@ nmi_restore:
+ END(nmi)
+ 
+ ENTRY(ignore_sysret)
++      UNWIND_HINT_EMPTY
+       mov     $-ENOSYS, %eax
+       sysret
+ END(ignore_sysret)
+ 
+ ENTRY(rewind_stack_do_exit)
++      UNWIND_HINT_FUNC
+       /* Prevent any naive code from trying to unwind to our caller. */
+       xorl    %ebp, %ebp
+ 
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rax
+-      leaq    -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
++      leaq    -PTREGS_SIZE(%rax), %rsp
++      UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
+ 
+       call    do_exit
+-1:    jmp 1b
+ END(rewind_stack_do_exit)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0023-xen-x86-Remove-SME-feature-in-PV-guests.patch b/patches/kernel/0023-xen-x86-Remove-SME-feature-in-PV-guests.patch

deleted file mode 100644 (file)

index bba2e33..0000000
--- a/patches/kernel/0023-xen-x86-Remove-SME-feature-in-PV-guests.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Mon, 17 Jul 2017 16:10:29 -0500
-Subject: [PATCH] xen/x86: Remove SME feature in PV guests
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Xen does not currently support SME for PV guests. Clear the SME CPU
-capability in order to avoid any ambiguity.
-
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-Cc: <xen-devel@lists.xen.org>
-Cc: Alexander Potapenko <glider@google.com>
-Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Arnd Bergmann <arnd@arndb.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brijesh Singh <brijesh.singh@amd.com>
-Cc: Dave Young <dyoung@redhat.com>
-Cc: Dmitry Vyukov <dvyukov@google.com>
-Cc: Jonathan Corbet <corbet@lwn.net>
-Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-Cc: Larry Woodman <lwoodman@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Matt Fleming <matt@codeblueprint.co.uk>
-Cc: Michael S. Tsirkin <mst@redhat.com>
-Cc: Paolo Bonzini <pbonzini@redhat.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Radim Krčmář <rkrcmar@redhat.com>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Toshimitsu Kani <toshi.kani@hpe.com>
-Cc: kasan-dev@googlegroups.com
-Cc: kvm@vger.kernel.org
-Cc: linux-arch@vger.kernel.org
-Cc: linux-doc@vger.kernel.org
-Cc: linux-efi@vger.kernel.org
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/3b605622a9fae5e588e5a13967120a18ec18071b.1500319216.git.thomas.lendacky@amd.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit f2f931c6819467af5260a21c59fb787ce2863f92)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8370907399392a637a2e51b4db3368fb594db3a6)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/xen/enlighten_pv.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
-index 290bc5ac9852..df1921751aa5 100644
---- a/arch/x86/xen/enlighten_pv.c
-+++ b/arch/x86/xen/enlighten_pv.c
-@@ -263,6 +263,7 @@ static void __init xen_init_capabilities(void)
-       setup_clear_cpu_cap(X86_FEATURE_MTRR);
-       setup_clear_cpu_cap(X86_FEATURE_ACC);
-       setup_clear_cpu_cap(X86_FEATURE_X2APIC);
-+      setup_clear_cpu_cap(X86_FEATURE_SME);
- 
-       /*
-        * Xen PV would need some work to support PCID: CR3 handling as well
--- 
-2.14.2
-
diff --git a/patches/kernel/0024-x86-xen-64-Rearrange-the-SYSCALL-entries.patch b/patches/kernel/0024-x86-xen-64-Rearrange-the-SYSCALL-entries.patch

deleted file mode 100644 (file)

index c6898df..0000000
--- a/patches/kernel/0024-x86-xen-64-Rearrange-the-SYSCALL-entries.patch
+++ /dev/null
@@ -1,152 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 7 Aug 2017 20:59:21 -0700
-Subject: [PATCH] x86/xen/64: Rearrange the SYSCALL entries
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Xen's raw SYSCALL entries are much less weird than native.  Rather
-than fudging them to look like native entries, use the Xen-provided
-stack frame directly.
-
-This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
-the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
-benefit from similar treatment.
-
-This makes one change to the native code path: the compat
-instruction that clears the high 32 bits of %rax is moved slightly
-later.  I'd be surprised if this affects performance at all.
-
-Tested-by: Juergen Gross <jgross@suse.com>
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: xen-devel@lists.xenproject.org
-Link: http://lkml.kernel.org/r/7c88ed36805d36841ab03ec3b48b4122c4418d71.1502164668.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 8a9949bc71a71b3dd633255ebe8f8869b1f73474)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit b8cec41ee5f30df5032cfe8c86103f7d92a89590)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S        |  9 ++-------
- arch/x86/entry/entry_64_compat.S |  7 +++----
- arch/x86/xen/xen-asm_64.S        | 23 +++++++++--------------
- 3 files changed, 14 insertions(+), 25 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 64b233ab7cad..4dbb336a1fdd 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64)
-        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
-        * it is too small to ever cause noticeable irq latency.
-        */
--      SWAPGS_UNSAFE_STACK
--      /*
--       * A hypervisor implementation might want to use a label
--       * after the swapgs, so that it can do the swapgs
--       * for the guest and jump here on syscall.
--       */
--GLOBAL(entry_SYSCALL_64_after_swapgs)
- 
-+      swapgs
-       movq    %rsp, PER_CPU_VAR(rsp_scratch)
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- 
-@@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
-       pushq   %r11                            /* pt_regs->flags */
-       pushq   $__USER_CS                      /* pt_regs->cs */
-       pushq   %rcx                            /* pt_regs->ip */
-+GLOBAL(entry_SYSCALL_64_after_hwframe)
-       pushq   %rax                            /* pt_regs->orig_ax */
-       pushq   %rdi                            /* pt_regs->di */
-       pushq   %rsi                            /* pt_regs->si */
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index e1721dafbcb1..5314d7b8e5ad 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
-  */
- ENTRY(entry_SYSCALL_compat)
-       /* Interrupts are off on entry. */
--      SWAPGS_UNSAFE_STACK
-+      swapgs
- 
-       /* Stash user ESP and switch to the kernel stack. */
-       movl    %esp, %r8d
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- 
--      /* Zero-extending 32-bit regs, do not remove */
--      movl    %eax, %eax
--
-       /* Construct struct pt_regs on stack */
-       pushq   $__USER32_DS            /* pt_regs->ss */
-       pushq   %r8                     /* pt_regs->sp */
-       pushq   %r11                    /* pt_regs->flags */
-       pushq   $__USER32_CS            /* pt_regs->cs */
-       pushq   %rcx                    /* pt_regs->ip */
-+GLOBAL(entry_SYSCALL_compat_after_hwframe)
-+      movl    %eax, %eax              /* discard orig_ax high bits */
-       pushq   %rax                    /* pt_regs->orig_ax */
-       pushq   %rdi                    /* pt_regs->di */
-       pushq   %rsi                    /* pt_regs->si */
-diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
-index c3df43141e70..a8a4f4c460a6 100644
---- a/arch/x86/xen/xen-asm_64.S
-+++ b/arch/x86/xen/xen-asm_64.S
-@@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1)
-  *    rip
-  *    r11
-  * rsp->rcx
-- *
-- * In all the entrypoints, we undo all that to make it look like a
-- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
-  */
- 
--.macro undo_xen_syscall
--      mov 0*8(%rsp), %rcx
--      mov 1*8(%rsp), %r11
--      mov 5*8(%rsp), %rsp
--.endm
--
- /* Normal 64-bit system call target */
- ENTRY(xen_syscall_target)
--      undo_xen_syscall
--      jmp entry_SYSCALL_64_after_swapgs
-+      popq %rcx
-+      popq %r11
-+      jmp entry_SYSCALL_64_after_hwframe
- ENDPROC(xen_syscall_target)
- 
- #ifdef CONFIG_IA32_EMULATION
- 
- /* 32-bit compat syscall target */
- ENTRY(xen_syscall32_target)
--      undo_xen_syscall
--      jmp entry_SYSCALL_compat
-+      popq %rcx
-+      popq %r11
-+      jmp entry_SYSCALL_compat_after_hwframe
- ENDPROC(xen_syscall32_target)
- 
- /* 32-bit compat sysenter target */
- ENTRY(xen_sysenter_target)
--      undo_xen_syscall
-+      mov 0*8(%rsp), %rcx
-+      mov 1*8(%rsp), %r11
-+      mov 5*8(%rsp), %rsp
-       jmp entry_SYSENTER_compat
- ENDPROC(xen_sysenter_target)
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0024-xen-x86-Remove-SME-feature-in-PV-guests.patch b/patches/kernel/0024-xen-x86-Remove-SME-feature-in-PV-guests.patch

new file mode 100644 (file)

index 0000000..bba2e33
--- /dev/null
+++ b/patches/kernel/0024-xen-x86-Remove-SME-feature-in-PV-guests.patch
@@ -0,0 +1,70 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 17 Jul 2017 16:10:29 -0500
+Subject: [PATCH] xen/x86: Remove SME feature in PV guests
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Xen does not currently support SME for PV guests. Clear the SME CPU
+capability in order to avoid any ambiguity.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: <xen-devel@lists.xen.org>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Larry Woodman <lwoodman@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matt Fleming <matt@codeblueprint.co.uk>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Toshimitsu Kani <toshi.kani@hpe.com>
+Cc: kasan-dev@googlegroups.com
+Cc: kvm@vger.kernel.org
+Cc: linux-arch@vger.kernel.org
+Cc: linux-doc@vger.kernel.org
+Cc: linux-efi@vger.kernel.org
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/3b605622a9fae5e588e5a13967120a18ec18071b.1500319216.git.thomas.lendacky@amd.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit f2f931c6819467af5260a21c59fb787ce2863f92)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8370907399392a637a2e51b4db3368fb594db3a6)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/xen/enlighten_pv.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index 290bc5ac9852..df1921751aa5 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -263,6 +263,7 @@ static void __init xen_init_capabilities(void)
+       setup_clear_cpu_cap(X86_FEATURE_MTRR);
+       setup_clear_cpu_cap(X86_FEATURE_ACC);
+       setup_clear_cpu_cap(X86_FEATURE_X2APIC);
++      setup_clear_cpu_cap(X86_FEATURE_SME);
+ 
+       /*
+        * Xen PV would need some work to support PCID: CR3 handling as well
+-- 
+2.14.2
+
diff --git a/patches/kernel/0025-irq-Make-the-irqentry-text-section-unconditional.patch b/patches/kernel/0025-irq-Make-the-irqentry-text-section-unconditional.patch

deleted file mode 100644 (file)

index 4be6064..0000000
--- a/patches/kernel/0025-irq-Make-the-irqentry-text-section-unconditional.patch
+++ /dev/null
@@ -1,223 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Masami Hiramatsu <mhiramat@kernel.org>
-Date: Thu, 3 Aug 2017 11:38:21 +0900
-Subject: [PATCH] irq: Make the irqentry text section unconditional
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Generate irqentry and softirqentry text sections without
-any Kconfig dependencies. This will add extra sections, but
-there should be no performace impact.
-
-Suggested-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
-Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
-Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
-Cc: Chris Zankel <chris@zankel.net>
-Cc: David S . Miller <davem@davemloft.net>
-Cc: Francis Deslauriers <francis.deslauriers@efficios.com>
-Cc: Jesper Nilsson <jesper.nilsson@axis.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Max Filippov <jcmvbkbc@gmail.com>
-Cc: Mikael Starvik <starvik@axis.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
-Cc: linux-arch@vger.kernel.org
-Cc: linux-cris-kernel@axis.com
-Cc: mathieu.desnoyers@efficios.com
-Link: http://lkml.kernel.org/r/150172789110.27216.3955739126693102122.stgit@devbox
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 229a71860547ec856b156179a9c6bef2de426f66)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8fd2f68cc93ae772cfddf4151d13448ff17d0229)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/arm/include/asm/traps.h      |  7 -------
- arch/arm64/include/asm/traps.h    |  7 -------
- include/asm-generic/sections.h    |  4 ++++
- include/asm-generic/vmlinux.lds.h |  8 --------
- include/linux/interrupt.h         | 14 +-------------
- arch/x86/kernel/unwind_frame.c    |  2 --
- arch/x86/entry/entry_64.S         |  9 ++-------
- 7 files changed, 7 insertions(+), 44 deletions(-)
-
-diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
-index f555bb3664dc..683d9230984a 100644
---- a/arch/arm/include/asm/traps.h
-+++ b/arch/arm/include/asm/traps.h
-@@ -18,7 +18,6 @@ struct undef_hook {
- void register_undef_hook(struct undef_hook *hook);
- void unregister_undef_hook(struct undef_hook *hook);
- 
--#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- static inline int __in_irqentry_text(unsigned long ptr)
- {
-       extern char __irqentry_text_start[];
-@@ -27,12 +26,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
-       return ptr >= (unsigned long)&__irqentry_text_start &&
-              ptr < (unsigned long)&__irqentry_text_end;
- }
--#else
--static inline int __in_irqentry_text(unsigned long ptr)
--{
--      return 0;
--}
--#endif
- 
- static inline int in_exception_text(unsigned long ptr)
- {
-diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
-index 02e9035b0685..47a9066f7c86 100644
---- a/arch/arm64/include/asm/traps.h
-+++ b/arch/arm64/include/asm/traps.h
-@@ -37,18 +37,11 @@ void unregister_undef_hook(struct undef_hook *hook);
- 
- void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr);
- 
--#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- static inline int __in_irqentry_text(unsigned long ptr)
- {
-       return ptr >= (unsigned long)&__irqentry_text_start &&
-              ptr < (unsigned long)&__irqentry_text_end;
- }
--#else
--static inline int __in_irqentry_text(unsigned long ptr)
--{
--      return 0;
--}
--#endif
- 
- static inline int in_exception_text(unsigned long ptr)
- {
-diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
-index 532372c6cf15..e5da44eddd2f 100644
---- a/include/asm-generic/sections.h
-+++ b/include/asm-generic/sections.h
-@@ -27,6 +27,8 @@
-  *    __kprobes_text_start, __kprobes_text_end
-  *    __entry_text_start, __entry_text_end
-  *    __ctors_start, __ctors_end
-+ *    __irqentry_text_start, __irqentry_text_end
-+ *    __softirqentry_text_start, __softirqentry_text_end
-  */
- extern char _text[], _stext[], _etext[];
- extern char _data[], _sdata[], _edata[];
-@@ -39,6 +41,8 @@ extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
- extern char __kprobes_text_start[], __kprobes_text_end[];
- extern char __entry_text_start[], __entry_text_end[];
- extern char __start_rodata[], __end_rodata[];
-+extern char __irqentry_text_start[], __irqentry_text_end[];
-+extern char __softirqentry_text_start[], __softirqentry_text_end[];
- 
- /* Start and end of .ctors section - used for constructor calls. */
- extern char __ctors_start[], __ctors_end[];
-diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
-index 9623d78f8494..e7e955d4ab9e 100644
---- a/include/asm-generic/vmlinux.lds.h
-+++ b/include/asm-generic/vmlinux.lds.h
-@@ -497,25 +497,17 @@
-               *(.entry.text)                                          \
-               VMLINUX_SYMBOL(__entry_text_end) = .;
- 
--#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
- #define IRQENTRY_TEXT                                                 \
-               ALIGN_FUNCTION();                                       \
-               VMLINUX_SYMBOL(__irqentry_text_start) = .;              \
-               *(.irqentry.text)                                       \
-               VMLINUX_SYMBOL(__irqentry_text_end) = .;
--#else
--#define IRQENTRY_TEXT
--#endif
- 
--#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
- #define SOFTIRQENTRY_TEXT                                             \
-               ALIGN_FUNCTION();                                       \
-               VMLINUX_SYMBOL(__softirqentry_text_start) = .;          \
-               *(.softirqentry.text)                                   \
-               VMLINUX_SYMBOL(__softirqentry_text_end) = .;
--#else
--#define SOFTIRQENTRY_TEXT
--#endif
- 
- /* Section used for early init (in .S files) */
- #define HEAD_TEXT  *(.head.text)
-diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
-index a2fddddb0d60..59ba11661b6e 100644
---- a/include/linux/interrupt.h
-+++ b/include/linux/interrupt.h
-@@ -18,6 +18,7 @@
- #include <linux/atomic.h>
- #include <asm/ptrace.h>
- #include <asm/irq.h>
-+#include <asm/sections.h>
- 
- /*
-  * These correspond to the IORESOURCE_IRQ_* defines in
-@@ -726,7 +727,6 @@ extern int early_irq_init(void);
- extern int arch_probe_nr_irqs(void);
- extern int arch_early_irq_init(void);
- 
--#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
- /*
-  * We want to know which function is an entrypoint of a hardirq or a softirq.
-  */
-@@ -734,16 +734,4 @@ extern int arch_early_irq_init(void);
- #define __softirq_entry  \
-       __attribute__((__section__(".softirqentry.text")))
- 
--/* Limits of hardirq entrypoints */
--extern char __irqentry_text_start[];
--extern char __irqentry_text_end[];
--/* Limits of softirq entrypoints */
--extern char __softirqentry_text_start[];
--extern char __softirqentry_text_end[];
--
--#else
--#define __irq_entry
--#define __softirq_entry
--#endif
--
- #endif
-diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
-index b9389d72b2f7..c29e5bc7e9c9 100644
---- a/arch/x86/kernel/unwind_frame.c
-+++ b/arch/x86/kernel/unwind_frame.c
-@@ -91,10 +91,8 @@ static bool in_entry_code(unsigned long ip)
-       if (addr >= __entry_text_start && addr < __entry_text_end)
-               return true;
- 
--#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
-       if (addr >= __irqentry_text_start && addr < __irqentry_text_end)
-               return true;
--#endif
- 
-       return false;
- }
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 4dbb336a1fdd..ca0b250eefc4 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -761,13 +761,8 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
- #endif
- 
- /* Make sure APIC interrupt handlers end up in the irqentry section: */
--#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
--# define PUSH_SECTION_IRQENTRY        .pushsection .irqentry.text, "ax"
--# define POP_SECTION_IRQENTRY .popsection
--#else
--# define PUSH_SECTION_IRQENTRY
--# define POP_SECTION_IRQENTRY
--#endif
-+#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
-+#define POP_SECTION_IRQENTRY  .popsection
- 
- .macro apicinterrupt num sym do_sym
- PUSH_SECTION_IRQENTRY
--- 
-2.14.2
-
diff --git a/patches/kernel/0025-x86-xen-64-Rearrange-the-SYSCALL-entries.patch b/patches/kernel/0025-x86-xen-64-Rearrange-the-SYSCALL-entries.patch

new file mode 100644 (file)

index 0000000..c6898df
--- /dev/null
+++ b/patches/kernel/0025-x86-xen-64-Rearrange-the-SYSCALL-entries.patch
@@ -0,0 +1,152 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 7 Aug 2017 20:59:21 -0700
+Subject: [PATCH] x86/xen/64: Rearrange the SYSCALL entries
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Xen's raw SYSCALL entries are much less weird than native.  Rather
+than fudging them to look like native entries, use the Xen-provided
+stack frame directly.
+
+This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
+the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
+benefit from similar treatment.
+
+This makes one change to the native code path: the compat
+instruction that clears the high 32 bits of %rax is moved slightly
+later.  I'd be surprised if this affects performance at all.
+
+Tested-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: xen-devel@lists.xenproject.org
+Link: http://lkml.kernel.org/r/7c88ed36805d36841ab03ec3b48b4122c4418d71.1502164668.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 8a9949bc71a71b3dd633255ebe8f8869b1f73474)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit b8cec41ee5f30df5032cfe8c86103f7d92a89590)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S        |  9 ++-------
+ arch/x86/entry/entry_64_compat.S |  7 +++----
+ arch/x86/xen/xen-asm_64.S        | 23 +++++++++--------------
+ 3 files changed, 14 insertions(+), 25 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 64b233ab7cad..4dbb336a1fdd 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64)
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+        * it is too small to ever cause noticeable irq latency.
+        */
+-      SWAPGS_UNSAFE_STACK
+-      /*
+-       * A hypervisor implementation might want to use a label
+-       * after the swapgs, so that it can do the swapgs
+-       * for the guest and jump here on syscall.
+-       */
+-GLOBAL(entry_SYSCALL_64_after_swapgs)
+ 
++      swapgs
+       movq    %rsp, PER_CPU_VAR(rsp_scratch)
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ 
+@@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
+       pushq   %r11                            /* pt_regs->flags */
+       pushq   $__USER_CS                      /* pt_regs->cs */
+       pushq   %rcx                            /* pt_regs->ip */
++GLOBAL(entry_SYSCALL_64_after_hwframe)
+       pushq   %rax                            /* pt_regs->orig_ax */
+       pushq   %rdi                            /* pt_regs->di */
+       pushq   %rsi                            /* pt_regs->si */
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index e1721dafbcb1..5314d7b8e5ad 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
+  */
+ ENTRY(entry_SYSCALL_compat)
+       /* Interrupts are off on entry. */
+-      SWAPGS_UNSAFE_STACK
++      swapgs
+ 
+       /* Stash user ESP and switch to the kernel stack. */
+       movl    %esp, %r8d
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ 
+-      /* Zero-extending 32-bit regs, do not remove */
+-      movl    %eax, %eax
+-
+       /* Construct struct pt_regs on stack */
+       pushq   $__USER32_DS            /* pt_regs->ss */
+       pushq   %r8                     /* pt_regs->sp */
+       pushq   %r11                    /* pt_regs->flags */
+       pushq   $__USER32_CS            /* pt_regs->cs */
+       pushq   %rcx                    /* pt_regs->ip */
++GLOBAL(entry_SYSCALL_compat_after_hwframe)
++      movl    %eax, %eax              /* discard orig_ax high bits */
+       pushq   %rax                    /* pt_regs->orig_ax */
+       pushq   %rdi                    /* pt_regs->di */
+       pushq   %rsi                    /* pt_regs->si */
+diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
+index c3df43141e70..a8a4f4c460a6 100644
+--- a/arch/x86/xen/xen-asm_64.S
++++ b/arch/x86/xen/xen-asm_64.S
+@@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1)
+  *    rip
+  *    r11
+  * rsp->rcx
+- *
+- * In all the entrypoints, we undo all that to make it look like a
+- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
+  */
+ 
+-.macro undo_xen_syscall
+-      mov 0*8(%rsp), %rcx
+-      mov 1*8(%rsp), %r11
+-      mov 5*8(%rsp), %rsp
+-.endm
+-
+ /* Normal 64-bit system call target */
+ ENTRY(xen_syscall_target)
+-      undo_xen_syscall
+-      jmp entry_SYSCALL_64_after_swapgs
++      popq %rcx
++      popq %r11
++      jmp entry_SYSCALL_64_after_hwframe
+ ENDPROC(xen_syscall_target)
+ 
+ #ifdef CONFIG_IA32_EMULATION
+ 
+ /* 32-bit compat syscall target */
+ ENTRY(xen_syscall32_target)
+-      undo_xen_syscall
+-      jmp entry_SYSCALL_compat
++      popq %rcx
++      popq %r11
++      jmp entry_SYSCALL_compat_after_hwframe
+ ENDPROC(xen_syscall32_target)
+ 
+ /* 32-bit compat sysenter target */
+ ENTRY(xen_sysenter_target)
+-      undo_xen_syscall
++      mov 0*8(%rsp), %rcx
++      mov 1*8(%rsp), %r11
++      mov 5*8(%rsp), %rsp
+       jmp entry_SYSENTER_compat
+ ENDPROC(xen_sysenter_target)
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0026-irq-Make-the-irqentry-text-section-unconditional.patch b/patches/kernel/0026-irq-Make-the-irqentry-text-section-unconditional.patch

new file mode 100644 (file)

index 0000000..4be6064
--- /dev/null
+++ b/patches/kernel/0026-irq-Make-the-irqentry-text-section-unconditional.patch
@@ -0,0 +1,223 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Thu, 3 Aug 2017 11:38:21 +0900
+Subject: [PATCH] irq: Make the irqentry text section unconditional
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Generate irqentry and softirqentry text sections without
+any Kconfig dependencies. This will add extra sections, but
+there should be no performace impact.
+
+Suggested-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
+Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+Cc: Chris Zankel <chris@zankel.net>
+Cc: David S . Miller <davem@davemloft.net>
+Cc: Francis Deslauriers <francis.deslauriers@efficios.com>
+Cc: Jesper Nilsson <jesper.nilsson@axis.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Max Filippov <jcmvbkbc@gmail.com>
+Cc: Mikael Starvik <starvik@axis.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
+Cc: linux-arch@vger.kernel.org
+Cc: linux-cris-kernel@axis.com
+Cc: mathieu.desnoyers@efficios.com
+Link: http://lkml.kernel.org/r/150172789110.27216.3955739126693102122.stgit@devbox
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 229a71860547ec856b156179a9c6bef2de426f66)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8fd2f68cc93ae772cfddf4151d13448ff17d0229)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/arm/include/asm/traps.h      |  7 -------
+ arch/arm64/include/asm/traps.h    |  7 -------
+ include/asm-generic/sections.h    |  4 ++++
+ include/asm-generic/vmlinux.lds.h |  8 --------
+ include/linux/interrupt.h         | 14 +-------------
+ arch/x86/kernel/unwind_frame.c    |  2 --
+ arch/x86/entry/entry_64.S         |  9 ++-------
+ 7 files changed, 7 insertions(+), 44 deletions(-)
+
+diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
+index f555bb3664dc..683d9230984a 100644
+--- a/arch/arm/include/asm/traps.h
++++ b/arch/arm/include/asm/traps.h
+@@ -18,7 +18,6 @@ struct undef_hook {
+ void register_undef_hook(struct undef_hook *hook);
+ void unregister_undef_hook(struct undef_hook *hook);
+ 
+-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ static inline int __in_irqentry_text(unsigned long ptr)
+ {
+       extern char __irqentry_text_start[];
+@@ -27,12 +26,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
+       return ptr >= (unsigned long)&__irqentry_text_start &&
+              ptr < (unsigned long)&__irqentry_text_end;
+ }
+-#else
+-static inline int __in_irqentry_text(unsigned long ptr)
+-{
+-      return 0;
+-}
+-#endif
+ 
+ static inline int in_exception_text(unsigned long ptr)
+ {
+diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
+index 02e9035b0685..47a9066f7c86 100644
+--- a/arch/arm64/include/asm/traps.h
++++ b/arch/arm64/include/asm/traps.h
+@@ -37,18 +37,11 @@ void unregister_undef_hook(struct undef_hook *hook);
+ 
+ void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr);
+ 
+-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ static inline int __in_irqentry_text(unsigned long ptr)
+ {
+       return ptr >= (unsigned long)&__irqentry_text_start &&
+              ptr < (unsigned long)&__irqentry_text_end;
+ }
+-#else
+-static inline int __in_irqentry_text(unsigned long ptr)
+-{
+-      return 0;
+-}
+-#endif
+ 
+ static inline int in_exception_text(unsigned long ptr)
+ {
+diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
+index 532372c6cf15..e5da44eddd2f 100644
+--- a/include/asm-generic/sections.h
++++ b/include/asm-generic/sections.h
+@@ -27,6 +27,8 @@
+  *    __kprobes_text_start, __kprobes_text_end
+  *    __entry_text_start, __entry_text_end
+  *    __ctors_start, __ctors_end
++ *    __irqentry_text_start, __irqentry_text_end
++ *    __softirqentry_text_start, __softirqentry_text_end
+  */
+ extern char _text[], _stext[], _etext[];
+ extern char _data[], _sdata[], _edata[];
+@@ -39,6 +41,8 @@ extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
+ extern char __kprobes_text_start[], __kprobes_text_end[];
+ extern char __entry_text_start[], __entry_text_end[];
+ extern char __start_rodata[], __end_rodata[];
++extern char __irqentry_text_start[], __irqentry_text_end[];
++extern char __softirqentry_text_start[], __softirqentry_text_end[];
+ 
+ /* Start and end of .ctors section - used for constructor calls. */
+ extern char __ctors_start[], __ctors_end[];
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index 9623d78f8494..e7e955d4ab9e 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -497,25 +497,17 @@
+               *(.entry.text)                                          \
+               VMLINUX_SYMBOL(__entry_text_end) = .;
+ 
+-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+ #define IRQENTRY_TEXT                                                 \
+               ALIGN_FUNCTION();                                       \
+               VMLINUX_SYMBOL(__irqentry_text_start) = .;              \
+               *(.irqentry.text)                                       \
+               VMLINUX_SYMBOL(__irqentry_text_end) = .;
+-#else
+-#define IRQENTRY_TEXT
+-#endif
+ 
+-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+ #define SOFTIRQENTRY_TEXT                                             \
+               ALIGN_FUNCTION();                                       \
+               VMLINUX_SYMBOL(__softirqentry_text_start) = .;          \
+               *(.softirqentry.text)                                   \
+               VMLINUX_SYMBOL(__softirqentry_text_end) = .;
+-#else
+-#define SOFTIRQENTRY_TEXT
+-#endif
+ 
+ /* Section used for early init (in .S files) */
+ #define HEAD_TEXT  *(.head.text)
+diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
+index a2fddddb0d60..59ba11661b6e 100644
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -18,6 +18,7 @@
+ #include <linux/atomic.h>
+ #include <asm/ptrace.h>
+ #include <asm/irq.h>
++#include <asm/sections.h>
+ 
+ /*
+  * These correspond to the IORESOURCE_IRQ_* defines in
+@@ -726,7 +727,6 @@ extern int early_irq_init(void);
+ extern int arch_probe_nr_irqs(void);
+ extern int arch_early_irq_init(void);
+ 
+-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+ /*
+  * We want to know which function is an entrypoint of a hardirq or a softirq.
+  */
+@@ -734,16 +734,4 @@ extern int arch_early_irq_init(void);
+ #define __softirq_entry  \
+       __attribute__((__section__(".softirqentry.text")))
+ 
+-/* Limits of hardirq entrypoints */
+-extern char __irqentry_text_start[];
+-extern char __irqentry_text_end[];
+-/* Limits of softirq entrypoints */
+-extern char __softirqentry_text_start[];
+-extern char __softirqentry_text_end[];
+-
+-#else
+-#define __irq_entry
+-#define __softirq_entry
+-#endif
+-
+ #endif
+diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
+index b9389d72b2f7..c29e5bc7e9c9 100644
+--- a/arch/x86/kernel/unwind_frame.c
++++ b/arch/x86/kernel/unwind_frame.c
+@@ -91,10 +91,8 @@ static bool in_entry_code(unsigned long ip)
+       if (addr >= __entry_text_start && addr < __entry_text_end)
+               return true;
+ 
+-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+       if (addr >= __irqentry_text_start && addr < __irqentry_text_end)
+               return true;
+-#endif
+ 
+       return false;
+ }
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 4dbb336a1fdd..ca0b250eefc4 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -761,13 +761,8 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
+ #endif
+ 
+ /* Make sure APIC interrupt handlers end up in the irqentry section: */
+-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+-# define PUSH_SECTION_IRQENTRY        .pushsection .irqentry.text, "ax"
+-# define POP_SECTION_IRQENTRY .popsection
+-#else
+-# define PUSH_SECTION_IRQENTRY
+-# define POP_SECTION_IRQENTRY
+-#endif
++#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
++#define POP_SECTION_IRQENTRY  .popsection
+ 
+ .macro apicinterrupt num sym do_sym
+ PUSH_SECTION_IRQENTRY
+-- 
+2.14.2
+
diff --git a/patches/kernel/0026-x86-xen-64-Fix-the-reported-SS-and-CS-in-SYSCALL.patch b/patches/kernel/0026-x86-xen-64-Fix-the-reported-SS-and-CS-in-SYSCALL.patch

deleted file mode 100644 (file)

index 345a513..0000000
--- a/patches/kernel/0026-x86-xen-64-Fix-the-reported-SS-and-CS-in-SYSCALL.patch
+++ /dev/null
@@ -1,84 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 14 Aug 2017 22:36:19 -0700
-Subject: [PATCH] x86/xen/64: Fix the reported SS and CS in SYSCALL
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-When I cleaned up the Xen SYSCALL entries, I inadvertently changed
-the reported segment registers.  Before my patch, regs->ss was
-__USER(32)_DS and regs->cs was __USER(32)_CS.  After the patch, they
-are FLAT_USER_CS/DS(32).
-
-This had a couple unfortunate effects.  It confused the
-opportunistic fast return logic.  It also significantly increased
-the risk of triggering a nasty glibc bug:
-
-  https://sourceware.org/bugzilla/show_bug.cgi?id=21269
-
-Update the Xen entry code to change it back.
-
-Reported-by: Brian Gerst <brgerst@gmail.com>
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Andrew Cooper <andrew.cooper3@citrix.com>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: xen-devel@lists.xenproject.org
-Fixes: 8a9949bc71a7 ("x86/xen/64: Rearrange the SYSCALL entries")
-Link: http://lkml.kernel.org/r/daba8351ea2764bb30272296ab9ce08a81bd8264.1502775273.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit fa2016a8e7d846b306e431646d250500e1da0c33)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 69a6ef3aeb274efe86fd74771830354f303ccc2f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/xen/xen-asm_64.S | 18 ++++++++++++++++++
- 1 file changed, 18 insertions(+)
-
-diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
-index a8a4f4c460a6..c5fee2680abc 100644
---- a/arch/x86/xen/xen-asm_64.S
-+++ b/arch/x86/xen/xen-asm_64.S
-@@ -88,6 +88,15 @@ RELOC(xen_sysret64, 1b+1)
- ENTRY(xen_syscall_target)
-       popq %rcx
-       popq %r11
-+
-+      /*
-+       * Neither Xen nor the kernel really knows what the old SS and
-+       * CS were.  The kernel expects __USER_DS and __USER_CS, so
-+       * report those values even though Xen will guess its own values.
-+       */
-+      movq $__USER_DS, 4*8(%rsp)
-+      movq $__USER_CS, 1*8(%rsp)
-+
-       jmp entry_SYSCALL_64_after_hwframe
- ENDPROC(xen_syscall_target)
- 
-@@ -97,6 +106,15 @@ ENDPROC(xen_syscall_target)
- ENTRY(xen_syscall32_target)
-       popq %rcx
-       popq %r11
-+
-+      /*
-+       * Neither Xen nor the kernel really knows what the old SS and
-+       * CS were.  The kernel expects __USER32_DS and __USER32_CS, so
-+       * report those values even though Xen will guess its own values.
-+       */
-+      movq $__USER32_DS, 4*8(%rsp)
-+      movq $__USER32_CS, 1*8(%rsp)
-+
-       jmp entry_SYSCALL_compat_after_hwframe
- ENDPROC(xen_syscall32_target)
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0027-x86-paravirt-xen-Remove-xen_patch.patch b/patches/kernel/0027-x86-paravirt-xen-Remove-xen_patch.patch

deleted file mode 100644 (file)

index 009b9b3..0000000
--- a/patches/kernel/0027-x86-paravirt-xen-Remove-xen_patch.patch
+++ /dev/null
@@ -1,360 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Wed, 16 Aug 2017 19:31:56 +0200
-Subject: [PATCH] x86/paravirt/xen: Remove xen_patch()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Xen's paravirt patch function xen_patch() does some special casing for
-irq_ops functions to apply relocations when those functions can be
-patched inline instead of calls.
-
-Unfortunately none of the special case function replacements is small
-enough to be patched inline, so the special case never applies.
-
-As xen_patch() will call paravirt_patch_default() in all cases it can
-be just dropped. xen-asm.h doesn't seem necessary without xen_patch()
-as the only thing left in it would be the definition of XEN_EFLAGS_NMI
-used only once. So move that definition and remove xen-asm.h.
-
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: boris.ostrovsky@oracle.com
-Cc: lguest@lists.ozlabs.org
-Cc: rusty@rustcorp.com.au
-Cc: xen-devel@lists.xenproject.org
-Link: http://lkml.kernel.org/r/20170816173157.8633-2-jgross@suse.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit edcb5cf84f05e5d2e2af25422a72ccde359fcca9)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit c96c9c712136a9e24a7aaf0aac4c149eee01bd8e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/xen/xen-asm.h      | 12 ---------
- arch/x86/xen/xen-ops.h      | 15 +++---------
- arch/x86/xen/enlighten_pv.c | 59 +--------------------------------------------
- arch/x86/xen/xen-asm.S      | 26 +++++---------------
- arch/x86/xen/xen-asm_32.S   | 27 ++++-----------------
- arch/x86/xen/xen-asm_64.S   | 20 ++++-----------
- 6 files changed, 21 insertions(+), 138 deletions(-)
- delete mode 100644 arch/x86/xen/xen-asm.h
-
-diff --git a/arch/x86/xen/xen-asm.h b/arch/x86/xen/xen-asm.h
-deleted file mode 100644
-index 465276467a47..000000000000
---- a/arch/x86/xen/xen-asm.h
-+++ /dev/null
-@@ -1,12 +0,0 @@
--#ifndef _XEN_XEN_ASM_H
--#define _XEN_XEN_ASM_H
--
--#include <linux/linkage.h>
--
--#define RELOC(x, v)   .globl x##_reloc; x##_reloc=v
--#define ENDPATCH(x)   .globl x##_end; x##_end=.
--
--/* Pseudo-flag used for virtual NMI, which we don't implement yet */
--#define XEN_EFLAGS_NMI        0x80000000
--
--#endif
-diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
-index 0d5004477db6..70301ac0d414 100644
---- a/arch/x86/xen/xen-ops.h
-+++ b/arch/x86/xen/xen-ops.h
-@@ -129,17 +129,10 @@ static inline void __init xen_efi_init(void)
- }
- #endif
- 
--/* Declare an asm function, along with symbols needed to make it
--   inlineable */
--#define DECL_ASM(ret, name, ...)              \
--      __visible ret name(__VA_ARGS__);        \
--      extern char name##_end[] __visible;     \
--      extern char name##_reloc[] __visible
--
--DECL_ASM(void, xen_irq_enable_direct, void);
--DECL_ASM(void, xen_irq_disable_direct, void);
--DECL_ASM(unsigned long, xen_save_fl_direct, void);
--DECL_ASM(void, xen_restore_fl_direct, unsigned long);
-+__visible void xen_irq_enable_direct(void);
-+__visible void xen_irq_disable_direct(void);
-+__visible unsigned long xen_save_fl_direct(void);
-+__visible void xen_restore_fl_direct(unsigned long);
- 
- /* These are not functions, and cannot be called normally */
- __visible void xen_iret(void);
-diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
-index df1921751aa5..6c279c8f0a0e 100644
---- a/arch/x86/xen/enlighten_pv.c
-+++ b/arch/x86/xen/enlighten_pv.c
-@@ -988,59 +988,6 @@ void __ref xen_setup_vcpu_info_placement(void)
-       }
- }
- 
--static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
--                        unsigned long addr, unsigned len)
--{
--      char *start, *end, *reloc;
--      unsigned ret;
--
--      start = end = reloc = NULL;
--
--#define SITE(op, x)                                                   \
--      case PARAVIRT_PATCH(op.x):                                      \
--      if (xen_have_vcpu_info_placement) {                             \
--              start = (char *)xen_##x##_direct;                       \
--              end = xen_##x##_direct_end;                             \
--              reloc = xen_##x##_direct_reloc;                         \
--      }                                                               \
--      goto patch_site
--
--      switch (type) {
--              SITE(pv_irq_ops, irq_enable);
--              SITE(pv_irq_ops, irq_disable);
--              SITE(pv_irq_ops, save_fl);
--              SITE(pv_irq_ops, restore_fl);
--#undef SITE
--
--      patch_site:
--              if (start == NULL || (end-start) > len)
--                      goto default_patch;
--
--              ret = paravirt_patch_insns(insnbuf, len, start, end);
--
--              /* Note: because reloc is assigned from something that
--                 appears to be an array, gcc assumes it's non-null,
--                 but doesn't know its relationship with start and
--                 end. */
--              if (reloc > start && reloc < end) {
--                      int reloc_off = reloc - start;
--                      long *relocp = (long *)(insnbuf + reloc_off);
--                      long delta = start - (char *)addr;
--
--                      *relocp += delta;
--              }
--              break;
--
--      default_patch:
--      default:
--              ret = paravirt_patch_default(type, clobbers, insnbuf,
--                                           addr, len);
--              break;
--      }
--
--      return ret;
--}
--
- static const struct pv_info xen_info __initconst = {
-       .shared_kernel_pmd = 0,
- 
-@@ -1050,10 +997,6 @@ static const struct pv_info xen_info __initconst = {
-       .name = "Xen",
- };
- 
--static const struct pv_init_ops xen_init_ops __initconst = {
--      .patch = xen_patch,
--};
--
- static const struct pv_cpu_ops xen_cpu_ops __initconst = {
-       .cpuid = xen_cpuid,
- 
-@@ -1251,7 +1194,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
- 
-       /* Install Xen paravirt ops */
-       pv_info = xen_info;
--      pv_init_ops = xen_init_ops;
-+      pv_init_ops.patch = paravirt_patch_default;
-       pv_cpu_ops = xen_cpu_ops;
- 
-       x86_platform.get_nmi_reason = xen_get_nmi_reason;
-diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
-index eff224df813f..dcd31fa39b5d 100644
---- a/arch/x86/xen/xen-asm.S
-+++ b/arch/x86/xen/xen-asm.S
-@@ -1,14 +1,8 @@
- /*
-- * Asm versions of Xen pv-ops, suitable for either direct use or
-- * inlining.  The inline versions are the same as the direct-use
-- * versions, with the pre- and post-amble chopped off.
-- *
-- * This code is encoded for size rather than absolute efficiency, with
-- * a view to being able to inline as much as possible.
-+ * Asm versions of Xen pv-ops, suitable for direct use.
-  *
-  * We only bother with direct forms (ie, vcpu in percpu data) of the
-- * operations here; the indirect forms are better handled in C, since
-- * they're generally too large to inline anyway.
-+ * operations here; the indirect forms are better handled in C.
-  */
- 
- #include <asm/asm-offsets.h>
-@@ -16,7 +10,7 @@
- #include <asm/processor-flags.h>
- #include <asm/frame.h>
- 
--#include "xen-asm.h"
-+#include <linux/linkage.h>
- 
- /*
-  * Enable events.  This clears the event mask and tests the pending
-@@ -38,13 +32,11 @@ ENTRY(xen_irq_enable_direct)
-       testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
-       jz 1f
- 
--2:    call check_events
-+      call check_events
- 1:
--ENDPATCH(xen_irq_enable_direct)
-       FRAME_END
-       ret
-       ENDPROC(xen_irq_enable_direct)
--      RELOC(xen_irq_enable_direct, 2b+1)
- 
- 
- /*
-@@ -53,10 +45,8 @@ ENDPATCH(xen_irq_enable_direct)
-  */
- ENTRY(xen_irq_disable_direct)
-       movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
--ENDPATCH(xen_irq_disable_direct)
-       ret
--      ENDPROC(xen_irq_disable_direct)
--      RELOC(xen_irq_disable_direct, 0)
-+ENDPROC(xen_irq_disable_direct)
- 
- /*
-  * (xen_)save_fl is used to get the current interrupt enable status.
-@@ -71,10 +61,8 @@ ENTRY(xen_save_fl_direct)
-       testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
-       setz %ah
-       addb %ah, %ah
--ENDPATCH(xen_save_fl_direct)
-       ret
-       ENDPROC(xen_save_fl_direct)
--      RELOC(xen_save_fl_direct, 0)
- 
- 
- /*
-@@ -101,13 +89,11 @@ ENTRY(xen_restore_fl_direct)
-       /* check for unmasked and pending */
-       cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
-       jnz 1f
--2:    call check_events
-+      call check_events
- 1:
--ENDPATCH(xen_restore_fl_direct)
-       FRAME_END
-       ret
-       ENDPROC(xen_restore_fl_direct)
--      RELOC(xen_restore_fl_direct, 2b+1)
- 
- 
- /*
-diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
-index feb6d40a0860..1200e262a116 100644
---- a/arch/x86/xen/xen-asm_32.S
-+++ b/arch/x86/xen/xen-asm_32.S
-@@ -1,14 +1,8 @@
- /*
-- * Asm versions of Xen pv-ops, suitable for either direct use or
-- * inlining.  The inline versions are the same as the direct-use
-- * versions, with the pre- and post-amble chopped off.
-- *
-- * This code is encoded for size rather than absolute efficiency, with
-- * a view to being able to inline as much as possible.
-+ * Asm versions of Xen pv-ops, suitable for direct use.
-  *
-  * We only bother with direct forms (ie, vcpu in pda) of the
-- * operations here; the indirect forms are better handled in C, since
-- * they're generally too large to inline anyway.
-+ * operations here; the indirect forms are better handled in C.
-  */
- 
- #include <asm/thread_info.h>
-@@ -18,21 +12,10 @@
- 
- #include <xen/interface/xen.h>
- 
--#include "xen-asm.h"
-+#include <linux/linkage.h>
- 
--/*
-- * Force an event check by making a hypercall, but preserve regs
-- * before making the call.
-- */
--check_events:
--      push %eax
--      push %ecx
--      push %edx
--      call xen_force_evtchn_callback
--      pop %edx
--      pop %ecx
--      pop %eax
--      ret
-+/* Pseudo-flag used for virtual NMI, which we don't implement yet */
-+#define XEN_EFLAGS_NMI  0x80000000
- 
- /*
-  * This is run where a normal iret would be run, with the same stack setup:
-diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
-index c5fee2680abc..3a3b6a211584 100644
---- a/arch/x86/xen/xen-asm_64.S
-+++ b/arch/x86/xen/xen-asm_64.S
-@@ -1,14 +1,8 @@
- /*
-- * Asm versions of Xen pv-ops, suitable for either direct use or
-- * inlining.  The inline versions are the same as the direct-use
-- * versions, with the pre- and post-amble chopped off.
-- *
-- * This code is encoded for size rather than absolute efficiency, with
-- * a view to being able to inline as much as possible.
-+ * Asm versions of Xen pv-ops, suitable for direct use.
-  *
-  * We only bother with direct forms (ie, vcpu in pda) of the
-- * operations here; the indirect forms are better handled in C, since
-- * they're generally too large to inline anyway.
-+ * operations here; the indirect forms are better handled in C.
-  */
- 
- #include <asm/errno.h>
-@@ -20,7 +14,7 @@
- 
- #include <xen/interface/xen.h>
- 
--#include "xen-asm.h"
-+#include <linux/linkage.h>
- 
- ENTRY(xen_adjust_exception_frame)
-       mov 8+0(%rsp), %rcx
-@@ -46,9 +40,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
-  */
- ENTRY(xen_iret)
-       pushq $0
--1:    jmp hypercall_iret
--ENDPATCH(xen_iret)
--RELOC(xen_iret, 1b+1)
-+      jmp hypercall_iret
- 
- ENTRY(xen_sysret64)
-       /*
-@@ -65,9 +57,7 @@ ENTRY(xen_sysret64)
-       pushq %rcx
- 
-       pushq $VGCF_in_syscall
--1:    jmp hypercall_iret
--ENDPATCH(xen_sysret64)
--RELOC(xen_sysret64, 1b+1)
-+      jmp hypercall_iret
- 
- /*
-  * Xen handles syscall callbacks much like ordinary exceptions, which
--- 
-2.14.2
-
diff --git a/patches/kernel/0027-x86-xen-64-Fix-the-reported-SS-and-CS-in-SYSCALL.patch b/patches/kernel/0027-x86-xen-64-Fix-the-reported-SS-and-CS-in-SYSCALL.patch

new file mode 100644 (file)

index 0000000..345a513
--- /dev/null
+++ b/patches/kernel/0027-x86-xen-64-Fix-the-reported-SS-and-CS-in-SYSCALL.patch
@@ -0,0 +1,84 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 14 Aug 2017 22:36:19 -0700
+Subject: [PATCH] x86/xen/64: Fix the reported SS and CS in SYSCALL
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+When I cleaned up the Xen SYSCALL entries, I inadvertently changed
+the reported segment registers.  Before my patch, regs->ss was
+__USER(32)_DS and regs->cs was __USER(32)_CS.  After the patch, they
+are FLAT_USER_CS/DS(32).
+
+This had a couple unfortunate effects.  It confused the
+opportunistic fast return logic.  It also significantly increased
+the risk of triggering a nasty glibc bug:
+
+  https://sourceware.org/bugzilla/show_bug.cgi?id=21269
+
+Update the Xen entry code to change it back.
+
+Reported-by: Brian Gerst <brgerst@gmail.com>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Cooper <andrew.cooper3@citrix.com>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: xen-devel@lists.xenproject.org
+Fixes: 8a9949bc71a7 ("x86/xen/64: Rearrange the SYSCALL entries")
+Link: http://lkml.kernel.org/r/daba8351ea2764bb30272296ab9ce08a81bd8264.1502775273.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit fa2016a8e7d846b306e431646d250500e1da0c33)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 69a6ef3aeb274efe86fd74771830354f303ccc2f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/xen/xen-asm_64.S | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
+index a8a4f4c460a6..c5fee2680abc 100644
+--- a/arch/x86/xen/xen-asm_64.S
++++ b/arch/x86/xen/xen-asm_64.S
+@@ -88,6 +88,15 @@ RELOC(xen_sysret64, 1b+1)
+ ENTRY(xen_syscall_target)
+       popq %rcx
+       popq %r11
++
++      /*
++       * Neither Xen nor the kernel really knows what the old SS and
++       * CS were.  The kernel expects __USER_DS and __USER_CS, so
++       * report those values even though Xen will guess its own values.
++       */
++      movq $__USER_DS, 4*8(%rsp)
++      movq $__USER_CS, 1*8(%rsp)
++
+       jmp entry_SYSCALL_64_after_hwframe
+ ENDPROC(xen_syscall_target)
+ 
+@@ -97,6 +106,15 @@ ENDPROC(xen_syscall_target)
+ ENTRY(xen_syscall32_target)
+       popq %rcx
+       popq %r11
++
++      /*
++       * Neither Xen nor the kernel really knows what the old SS and
++       * CS were.  The kernel expects __USER32_DS and __USER32_CS, so
++       * report those values even though Xen will guess its own values.
++       */
++      movq $__USER32_DS, 4*8(%rsp)
++      movq $__USER32_CS, 1*8(%rsp)
++
+       jmp entry_SYSCALL_compat_after_hwframe
+ ENDPROC(xen_syscall32_target)
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0028-x86-paravirt-xen-Remove-xen_patch.patch b/patches/kernel/0028-x86-paravirt-xen-Remove-xen_patch.patch

new file mode 100644 (file)

index 0000000..009b9b3
--- /dev/null
+++ b/patches/kernel/0028-x86-paravirt-xen-Remove-xen_patch.patch
@@ -0,0 +1,360 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Wed, 16 Aug 2017 19:31:56 +0200
+Subject: [PATCH] x86/paravirt/xen: Remove xen_patch()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Xen's paravirt patch function xen_patch() does some special casing for
+irq_ops functions to apply relocations when those functions can be
+patched inline instead of calls.
+
+Unfortunately none of the special case function replacements is small
+enough to be patched inline, so the special case never applies.
+
+As xen_patch() will call paravirt_patch_default() in all cases it can
+be just dropped. xen-asm.h doesn't seem necessary without xen_patch()
+as the only thing left in it would be the definition of XEN_EFLAGS_NMI
+used only once. So move that definition and remove xen-asm.h.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: boris.ostrovsky@oracle.com
+Cc: lguest@lists.ozlabs.org
+Cc: rusty@rustcorp.com.au
+Cc: xen-devel@lists.xenproject.org
+Link: http://lkml.kernel.org/r/20170816173157.8633-2-jgross@suse.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit edcb5cf84f05e5d2e2af25422a72ccde359fcca9)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit c96c9c712136a9e24a7aaf0aac4c149eee01bd8e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/xen/xen-asm.h      | 12 ---------
+ arch/x86/xen/xen-ops.h      | 15 +++---------
+ arch/x86/xen/enlighten_pv.c | 59 +--------------------------------------------
+ arch/x86/xen/xen-asm.S      | 26 +++++---------------
+ arch/x86/xen/xen-asm_32.S   | 27 ++++-----------------
+ arch/x86/xen/xen-asm_64.S   | 20 ++++-----------
+ 6 files changed, 21 insertions(+), 138 deletions(-)
+ delete mode 100644 arch/x86/xen/xen-asm.h
+
+diff --git a/arch/x86/xen/xen-asm.h b/arch/x86/xen/xen-asm.h
+deleted file mode 100644
+index 465276467a47..000000000000
+--- a/arch/x86/xen/xen-asm.h
++++ /dev/null
+@@ -1,12 +0,0 @@
+-#ifndef _XEN_XEN_ASM_H
+-#define _XEN_XEN_ASM_H
+-
+-#include <linux/linkage.h>
+-
+-#define RELOC(x, v)   .globl x##_reloc; x##_reloc=v
+-#define ENDPATCH(x)   .globl x##_end; x##_end=.
+-
+-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
+-#define XEN_EFLAGS_NMI        0x80000000
+-
+-#endif
+diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
+index 0d5004477db6..70301ac0d414 100644
+--- a/arch/x86/xen/xen-ops.h
++++ b/arch/x86/xen/xen-ops.h
+@@ -129,17 +129,10 @@ static inline void __init xen_efi_init(void)
+ }
+ #endif
+ 
+-/* Declare an asm function, along with symbols needed to make it
+-   inlineable */
+-#define DECL_ASM(ret, name, ...)              \
+-      __visible ret name(__VA_ARGS__);        \
+-      extern char name##_end[] __visible;     \
+-      extern char name##_reloc[] __visible
+-
+-DECL_ASM(void, xen_irq_enable_direct, void);
+-DECL_ASM(void, xen_irq_disable_direct, void);
+-DECL_ASM(unsigned long, xen_save_fl_direct, void);
+-DECL_ASM(void, xen_restore_fl_direct, unsigned long);
++__visible void xen_irq_enable_direct(void);
++__visible void xen_irq_disable_direct(void);
++__visible unsigned long xen_save_fl_direct(void);
++__visible void xen_restore_fl_direct(unsigned long);
+ 
+ /* These are not functions, and cannot be called normally */
+ __visible void xen_iret(void);
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index df1921751aa5..6c279c8f0a0e 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -988,59 +988,6 @@ void __ref xen_setup_vcpu_info_placement(void)
+       }
+ }
+ 
+-static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
+-                        unsigned long addr, unsigned len)
+-{
+-      char *start, *end, *reloc;
+-      unsigned ret;
+-
+-      start = end = reloc = NULL;
+-
+-#define SITE(op, x)                                                   \
+-      case PARAVIRT_PATCH(op.x):                                      \
+-      if (xen_have_vcpu_info_placement) {                             \
+-              start = (char *)xen_##x##_direct;                       \
+-              end = xen_##x##_direct_end;                             \
+-              reloc = xen_##x##_direct_reloc;                         \
+-      }                                                               \
+-      goto patch_site
+-
+-      switch (type) {
+-              SITE(pv_irq_ops, irq_enable);
+-              SITE(pv_irq_ops, irq_disable);
+-              SITE(pv_irq_ops, save_fl);
+-              SITE(pv_irq_ops, restore_fl);
+-#undef SITE
+-
+-      patch_site:
+-              if (start == NULL || (end-start) > len)
+-                      goto default_patch;
+-
+-              ret = paravirt_patch_insns(insnbuf, len, start, end);
+-
+-              /* Note: because reloc is assigned from something that
+-                 appears to be an array, gcc assumes it's non-null,
+-                 but doesn't know its relationship with start and
+-                 end. */
+-              if (reloc > start && reloc < end) {
+-                      int reloc_off = reloc - start;
+-                      long *relocp = (long *)(insnbuf + reloc_off);
+-                      long delta = start - (char *)addr;
+-
+-                      *relocp += delta;
+-              }
+-              break;
+-
+-      default_patch:
+-      default:
+-              ret = paravirt_patch_default(type, clobbers, insnbuf,
+-                                           addr, len);
+-              break;
+-      }
+-
+-      return ret;
+-}
+-
+ static const struct pv_info xen_info __initconst = {
+       .shared_kernel_pmd = 0,
+ 
+@@ -1050,10 +997,6 @@ static const struct pv_info xen_info __initconst = {
+       .name = "Xen",
+ };
+ 
+-static const struct pv_init_ops xen_init_ops __initconst = {
+-      .patch = xen_patch,
+-};
+-
+ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
+       .cpuid = xen_cpuid,
+ 
+@@ -1251,7 +1194,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
+ 
+       /* Install Xen paravirt ops */
+       pv_info = xen_info;
+-      pv_init_ops = xen_init_ops;
++      pv_init_ops.patch = paravirt_patch_default;
+       pv_cpu_ops = xen_cpu_ops;
+ 
+       x86_platform.get_nmi_reason = xen_get_nmi_reason;
+diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
+index eff224df813f..dcd31fa39b5d 100644
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -1,14 +1,8 @@
+ /*
+- * Asm versions of Xen pv-ops, suitable for either direct use or
+- * inlining.  The inline versions are the same as the direct-use
+- * versions, with the pre- and post-amble chopped off.
+- *
+- * This code is encoded for size rather than absolute efficiency, with
+- * a view to being able to inline as much as possible.
++ * Asm versions of Xen pv-ops, suitable for direct use.
+  *
+  * We only bother with direct forms (ie, vcpu in percpu data) of the
+- * operations here; the indirect forms are better handled in C, since
+- * they're generally too large to inline anyway.
++ * operations here; the indirect forms are better handled in C.
+  */
+ 
+ #include <asm/asm-offsets.h>
+@@ -16,7 +10,7 @@
+ #include <asm/processor-flags.h>
+ #include <asm/frame.h>
+ 
+-#include "xen-asm.h"
++#include <linux/linkage.h>
+ 
+ /*
+  * Enable events.  This clears the event mask and tests the pending
+@@ -38,13 +32,11 @@ ENTRY(xen_irq_enable_direct)
+       testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
+       jz 1f
+ 
+-2:    call check_events
++      call check_events
+ 1:
+-ENDPATCH(xen_irq_enable_direct)
+       FRAME_END
+       ret
+       ENDPROC(xen_irq_enable_direct)
+-      RELOC(xen_irq_enable_direct, 2b+1)
+ 
+ 
+ /*
+@@ -53,10 +45,8 @@ ENDPATCH(xen_irq_enable_direct)
+  */
+ ENTRY(xen_irq_disable_direct)
+       movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+-ENDPATCH(xen_irq_disable_direct)
+       ret
+-      ENDPROC(xen_irq_disable_direct)
+-      RELOC(xen_irq_disable_direct, 0)
++ENDPROC(xen_irq_disable_direct)
+ 
+ /*
+  * (xen_)save_fl is used to get the current interrupt enable status.
+@@ -71,10 +61,8 @@ ENTRY(xen_save_fl_direct)
+       testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+       setz %ah
+       addb %ah, %ah
+-ENDPATCH(xen_save_fl_direct)
+       ret
+       ENDPROC(xen_save_fl_direct)
+-      RELOC(xen_save_fl_direct, 0)
+ 
+ 
+ /*
+@@ -101,13 +89,11 @@ ENTRY(xen_restore_fl_direct)
+       /* check for unmasked and pending */
+       cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
+       jnz 1f
+-2:    call check_events
++      call check_events
+ 1:
+-ENDPATCH(xen_restore_fl_direct)
+       FRAME_END
+       ret
+       ENDPROC(xen_restore_fl_direct)
+-      RELOC(xen_restore_fl_direct, 2b+1)
+ 
+ 
+ /*
+diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
+index feb6d40a0860..1200e262a116 100644
+--- a/arch/x86/xen/xen-asm_32.S
++++ b/arch/x86/xen/xen-asm_32.S
+@@ -1,14 +1,8 @@
+ /*
+- * Asm versions of Xen pv-ops, suitable for either direct use or
+- * inlining.  The inline versions are the same as the direct-use
+- * versions, with the pre- and post-amble chopped off.
+- *
+- * This code is encoded for size rather than absolute efficiency, with
+- * a view to being able to inline as much as possible.
++ * Asm versions of Xen pv-ops, suitable for direct use.
+  *
+  * We only bother with direct forms (ie, vcpu in pda) of the
+- * operations here; the indirect forms are better handled in C, since
+- * they're generally too large to inline anyway.
++ * operations here; the indirect forms are better handled in C.
+  */
+ 
+ #include <asm/thread_info.h>
+@@ -18,21 +12,10 @@
+ 
+ #include <xen/interface/xen.h>
+ 
+-#include "xen-asm.h"
++#include <linux/linkage.h>
+ 
+-/*
+- * Force an event check by making a hypercall, but preserve regs
+- * before making the call.
+- */
+-check_events:
+-      push %eax
+-      push %ecx
+-      push %edx
+-      call xen_force_evtchn_callback
+-      pop %edx
+-      pop %ecx
+-      pop %eax
+-      ret
++/* Pseudo-flag used for virtual NMI, which we don't implement yet */
++#define XEN_EFLAGS_NMI  0x80000000
+ 
+ /*
+  * This is run where a normal iret would be run, with the same stack setup:
+diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
+index c5fee2680abc..3a3b6a211584 100644
+--- a/arch/x86/xen/xen-asm_64.S
++++ b/arch/x86/xen/xen-asm_64.S
+@@ -1,14 +1,8 @@
+ /*
+- * Asm versions of Xen pv-ops, suitable for either direct use or
+- * inlining.  The inline versions are the same as the direct-use
+- * versions, with the pre- and post-amble chopped off.
+- *
+- * This code is encoded for size rather than absolute efficiency, with
+- * a view to being able to inline as much as possible.
++ * Asm versions of Xen pv-ops, suitable for direct use.
+  *
+  * We only bother with direct forms (ie, vcpu in pda) of the
+- * operations here; the indirect forms are better handled in C, since
+- * they're generally too large to inline anyway.
++ * operations here; the indirect forms are better handled in C.
+  */
+ 
+ #include <asm/errno.h>
+@@ -20,7 +14,7 @@
+ 
+ #include <xen/interface/xen.h>
+ 
+-#include "xen-asm.h"
++#include <linux/linkage.h>
+ 
+ ENTRY(xen_adjust_exception_frame)
+       mov 8+0(%rsp), %rcx
+@@ -46,9 +40,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
+  */
+ ENTRY(xen_iret)
+       pushq $0
+-1:    jmp hypercall_iret
+-ENDPATCH(xen_iret)
+-RELOC(xen_iret, 1b+1)
++      jmp hypercall_iret
+ 
+ ENTRY(xen_sysret64)
+       /*
+@@ -65,9 +57,7 @@ ENTRY(xen_sysret64)
+       pushq %rcx
+ 
+       pushq $VGCF_in_syscall
+-1:    jmp hypercall_iret
+-ENDPATCH(xen_sysret64)
+-RELOC(xen_sysret64, 1b+1)
++      jmp hypercall_iret
+ 
+ /*
+  * Xen handles syscall callbacks much like ordinary exceptions, which
+-- 
+2.14.2
+
diff --git a/patches/kernel/0028-x86-traps-Simplify-pagefault-tracing-logic.patch b/patches/kernel/0028-x86-traps-Simplify-pagefault-tracing-logic.patch

deleted file mode 100644 (file)

index 801d82f..0000000
--- a/patches/kernel/0028-x86-traps-Simplify-pagefault-tracing-logic.patch
+++ /dev/null
@@ -1,218 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 28 Aug 2017 08:47:22 +0200
-Subject: [PATCH] x86/traps: Simplify pagefault tracing logic
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Make use of the new irqvector tracing static key and remove the duplicated
-trace_do_pagefault() implementation.
-
-If irq vector tracing is disabled, then the overhead of this is a single
-NOP5, which is a reasonable tradeoff to avoid duplicated code and the
-unholy macro mess.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Steven Rostedt <rostedt@goodmis.org>
-Link: http://lkml.kernel.org/r/20170828064956.672965407@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 11a7ffb01703c3bbb1e9b968893f4487a1b0b5a8)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8478bb5608747fd64c9fd4a2f5422fb4af756a50)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/traps.h | 10 +--------
- arch/x86/kernel/kvm.c        |  2 +-
- arch/x86/mm/fault.c          | 49 ++++++++++++--------------------------------
- arch/x86/entry/entry_32.S    |  8 --------
- arch/x86/entry/entry_64.S    | 13 +-----------
- 5 files changed, 16 insertions(+), 66 deletions(-)
-
-diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
-index 01fd0a7f48cd..b4f322d6c95f 100644
---- a/arch/x86/include/asm/traps.h
-+++ b/arch/x86/include/asm/traps.h
-@@ -39,7 +39,6 @@ asmlinkage void machine_check(void);
- asmlinkage void simd_coprocessor_error(void);
- 
- #ifdef CONFIG_TRACING
--asmlinkage void trace_page_fault(void);
- #define trace_stack_segment stack_segment
- #define trace_divide_error divide_error
- #define trace_bounds bounds
-@@ -54,6 +53,7 @@ asmlinkage void trace_page_fault(void);
- #define trace_alignment_check alignment_check
- #define trace_simd_coprocessor_error simd_coprocessor_error
- #define trace_async_page_fault async_page_fault
-+#define trace_page_fault page_fault
- #endif
- 
- dotraplinkage void do_divide_error(struct pt_regs *, long);
-@@ -74,14 +74,6 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
- #endif
- dotraplinkage void do_general_protection(struct pt_regs *, long);
- dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
--#ifdef CONFIG_TRACING
--dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long);
--#else
--static inline void trace_do_page_fault(struct pt_regs *regs, unsigned long error)
--{
--      do_page_fault(regs, error);
--}
--#endif
- dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
- dotraplinkage void do_coprocessor_error(struct pt_regs *, long);
- dotraplinkage void do_alignment_check(struct pt_regs *, long);
-diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
-index e5e4306e4546..9e3798b00e40 100644
---- a/arch/x86/kernel/kvm.c
-+++ b/arch/x86/kernel/kvm.c
-@@ -270,7 +270,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
- 
-       switch (kvm_read_and_reset_pf_reason()) {
-       default:
--              trace_do_page_fault(regs, error_code);
-+              do_page_fault(regs, error_code);
-               break;
-       case KVM_PV_REASON_PAGE_NOT_PRESENT:
-               /* page is swapped out by the host. */
-diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
-index 955be01dd9cc..4ee9eb916826 100644
---- a/arch/x86/mm/fault.c
-+++ b/arch/x86/mm/fault.c
-@@ -1253,10 +1253,6 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
-  * This routine handles page faults.  It determines the address,
-  * and the problem, and then passes it off to one of the appropriate
-  * routines.
-- *
-- * This function must have noinline because both callers
-- * {,trace_}do_page_fault() have notrace on. Having this an actual function
-- * guarantees there's a function trace entry.
-  */
- static noinline void
- __do_page_fault(struct pt_regs *regs, unsigned long error_code,
-@@ -1491,27 +1487,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
- }
- NOKPROBE_SYMBOL(__do_page_fault);
- 
--dotraplinkage void notrace
--do_page_fault(struct pt_regs *regs, unsigned long error_code)
--{
--      unsigned long address = read_cr2(); /* Get the faulting address */
--      enum ctx_state prev_state;
--
--      /*
--       * We must have this function tagged with __kprobes, notrace and call
--       * read_cr2() before calling anything else. To avoid calling any kind
--       * of tracing machinery before we've observed the CR2 value.
--       *
--       * exception_{enter,exit}() contain all sorts of tracepoints.
--       */
--
--      prev_state = exception_enter();
--      __do_page_fault(regs, error_code, address);
--      exception_exit(prev_state);
--}
--NOKPROBE_SYMBOL(do_page_fault);
--
--#ifdef CONFIG_TRACING
- static nokprobe_inline void
- trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
-                        unsigned long error_code)
-@@ -1522,22 +1497,24 @@ trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
-               trace_page_fault_kernel(address, regs, error_code);
- }
- 
-+/*
-+ * We must have this function blacklisted from kprobes, tagged with notrace
-+ * and call read_cr2() before calling anything else. To avoid calling any
-+ * kind of tracing machinery before we've observed the CR2 value.
-+ *
-+ * exception_{enter,exit}() contains all sorts of tracepoints.
-+ */
- dotraplinkage void notrace
--trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
-+do_page_fault(struct pt_regs *regs, unsigned long error_code)
- {
--      /*
--       * The exception_enter and tracepoint processing could
--       * trigger another page faults (user space callchain
--       * reading) and destroy the original cr2 value, so read
--       * the faulting address now.
--       */
--      unsigned long address = read_cr2();
-+      unsigned long address = read_cr2(); /* Get the faulting address */
-       enum ctx_state prev_state;
- 
-       prev_state = exception_enter();
--      trace_page_fault_entries(address, regs, error_code);
-+      if (trace_irqvectors_enabled())
-+              trace_page_fault_entries(address, regs, error_code);
-+
-       __do_page_fault(regs, error_code, address);
-       exception_exit(prev_state);
- }
--NOKPROBE_SYMBOL(trace_do_page_fault);
--#endif /* CONFIG_TRACING */
-+NOKPROBE_SYMBOL(do_page_fault);
-diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
-index 48ef7bb32c42..0092da1c056f 100644
---- a/arch/x86/entry/entry_32.S
-+++ b/arch/x86/entry/entry_32.S
-@@ -891,14 +891,6 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
- 
- #endif /* CONFIG_HYPERV */
- 
--#ifdef CONFIG_TRACING
--ENTRY(trace_page_fault)
--      ASM_CLAC
--      pushl   $trace_do_page_fault
--      jmp     common_exception
--END(trace_page_fault)
--#endif
--
- ENTRY(page_fault)
-       ASM_CLAC
-       pushl   $do_page_fault
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index ca0b250eefc4..dfabcbf8e813 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -913,17 +913,6 @@ ENTRY(\sym)
- END(\sym)
- .endm
- 
--#ifdef CONFIG_TRACING
--.macro trace_idtentry sym do_sym has_error_code:req
--idtentry trace(\sym) trace(\do_sym) has_error_code=\has_error_code
--idtentry \sym \do_sym has_error_code=\has_error_code
--.endm
--#else
--.macro trace_idtentry sym do_sym has_error_code:req
--idtentry \sym \do_sym has_error_code=\has_error_code
--.endm
--#endif
--
- idtentry divide_error                 do_divide_error                 has_error_code=0
- idtentry overflow                     do_overflow                     has_error_code=0
- idtentry bounds                               do_bounds                       has_error_code=0
-@@ -1091,7 +1080,7 @@ idtentry xen_stack_segment       do_stack_segment        has_error_code=1
- #endif
- 
- idtentry general_protection   do_general_protection   has_error_code=1
--trace_idtentry page_fault     do_page_fault           has_error_code=1
-+idtentry page_fault           do_page_fault           has_error_code=1
- 
- #ifdef CONFIG_KVM_GUEST
- idtentry async_page_fault     do_async_page_fault     has_error_code=1
--- 
-2.14.2
-
diff --git a/patches/kernel/0029-x86-idt-Unify-gate_struct-handling-for-32-64-bit-ker.patch b/patches/kernel/0029-x86-idt-Unify-gate_struct-handling-for-32-64-bit-ker.patch

deleted file mode 100644 (file)

index 4cfc341..0000000
--- a/patches/kernel/0029-x86-idt-Unify-gate_struct-handling-for-32-64-bit-ker.patch
+++ /dev/null
@@ -1,262 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 28 Aug 2017 08:47:37 +0200
-Subject: [PATCH] x86/idt: Unify gate_struct handling for 32/64-bit kernels
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The first 32 bits of gate struct are the same for 32 and 64 bit kernels.
-
-The 32-bit version uses desc_struct and no designated data structure,
-so we need different accessors for 32 and 64 bit kernels.
-
-Aside of that the macros which are necessary to build the 32-bit
-gate descriptor are horrible to read.
-
-Unify the gate structs and switch all code fiddling with it over.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Steven Rostedt <rostedt@goodmis.org>
-Link: http://lkml.kernel.org/r/20170828064957.861974317@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 64b163fab684e3de47aa8db6cc08ae7d2e194373)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 587719b1926757eb7531e0631d63fb93cd60d0d3)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/desc.h      | 45 ++++++++++++++-----------------
- arch/x86/include/asm/desc_defs.h | 57 ++++++++++++++++++++++++++--------------
- arch/x86/kvm/vmx.c               |  2 +-
- arch/x86/xen/enlighten_pv.c      | 12 ++++-----
- 4 files changed, 63 insertions(+), 53 deletions(-)
-
-diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
-index d0a21b12dd58..57e502a4e92f 100644
---- a/arch/x86/include/asm/desc.h
-+++ b/arch/x86/include/asm/desc.h
-@@ -83,33 +83,25 @@ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
-       return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
- }
- 
--#ifdef CONFIG_X86_64
--
- static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
-                            unsigned dpl, unsigned ist, unsigned seg)
- {
--      gate->offset_low        = PTR_LOW(func);
-+      gate->offset_low        = (u16) func;
-+      gate->bits.p            = 1;
-+      gate->bits.dpl          = dpl;
-+      gate->bits.zero         = 0;
-+      gate->bits.type         = type;
-+      gate->offset_middle     = (u16) (func >> 16);
-+#ifdef CONFIG_X86_64
-       gate->segment           = __KERNEL_CS;
--      gate->ist               = ist;
--      gate->p                 = 1;
--      gate->dpl               = dpl;
--      gate->zero0             = 0;
--      gate->zero1             = 0;
--      gate->type              = type;
--      gate->offset_middle     = PTR_MIDDLE(func);
--      gate->offset_high       = PTR_HIGH(func);
--}
--
-+      gate->bits.ist          = ist;
-+      gate->reserved          = 0;
-+      gate->offset_high       = (u32) (func >> 32);
- #else
--static inline void pack_gate(gate_desc *gate, unsigned char type,
--                           unsigned long base, unsigned dpl, unsigned flags,
--                           unsigned short seg)
--{
--      gate->a = (seg << 16) | (base & 0xffff);
--      gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
--}
--
-+      gate->segment           = seg;
-+      gate->bits.ist          = 0;
- #endif
-+}
- 
- static inline int desc_empty(const void *ptr)
- {
-@@ -185,7 +177,8 @@ static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
- }
- 
- 
--static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size)
-+static inline void set_tssldt_descriptor(void *d, unsigned long addr,
-+                                       unsigned type, unsigned size)
- {
- #ifdef CONFIG_X86_64
-       struct ldttss_desc64 *desc = d;
-@@ -193,13 +186,13 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
-       memset(desc, 0, sizeof(*desc));
- 
-       desc->limit0            = size & 0xFFFF;
--      desc->base0             = PTR_LOW(addr);
--      desc->base1             = PTR_MIDDLE(addr) & 0xFF;
-+      desc->base0             = (u16) addr;
-+      desc->base1             = (addr >> 16) & 0xFF;
-       desc->type              = type;
-       desc->p                 = 1;
-       desc->limit1            = (size >> 16) & 0xF;
--      desc->base2             = (PTR_MIDDLE(addr) >> 8) & 0xFF;
--      desc->base3             = PTR_HIGH(addr);
-+      desc->base2             = (addr >> 24) & 0xFF;
-+      desc->base3             = (u32) (addr >> 32);
- #else
-       pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
- #endif
-diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
-index 49265345d4d2..d684bee8a59a 100644
---- a/arch/x86/include/asm/desc_defs.h
-+++ b/arch/x86/include/asm/desc_defs.h
-@@ -47,20 +47,6 @@ enum {
-       GATE_TASK = 0x5,
- };
- 
--/* 16byte gate */
--struct gate_struct64 {
--      u16 offset_low;
--      u16 segment;
--      unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
--      u16 offset_middle;
--      u32 offset_high;
--      u32 zero1;
--} __attribute__((packed));
--
--#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF)
--#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF)
--#define PTR_HIGH(x) ((unsigned long long)(x) >> 32)
--
- enum {
-       DESC_TSS = 0x9,
-       DESC_LDT = 0x2,
-@@ -77,20 +63,51 @@ struct ldttss_desc64 {
-       u32 zero1;
- } __attribute__((packed));
- 
-+
- #ifdef CONFIG_X86_64
--typedef struct gate_struct64 gate_desc;
- typedef struct ldttss_desc64 ldt_desc;
- typedef struct ldttss_desc64 tss_desc;
--#define gate_offset(g) ((g).offset_low | ((unsigned long)(g).offset_middle << 16) | ((unsigned long)(g).offset_high << 32))
--#define gate_segment(g) ((g).segment)
- #else
--typedef struct desc_struct gate_desc;
- typedef struct desc_struct ldt_desc;
- typedef struct desc_struct tss_desc;
--#define gate_offset(g)                (((g).b & 0xffff0000) | ((g).a & 0x0000ffff))
--#define gate_segment(g)               ((g).a >> 16)
- #endif
- 
-+struct idt_bits {
-+      u16             ist     : 3,
-+                      zero    : 5,
-+                      type    : 5,
-+                      dpl     : 2,
-+                      p       : 1;
-+} __attribute__((packed));
-+
-+struct gate_struct {
-+      u16             offset_low;
-+      u16             segment;
-+      struct idt_bits bits;
-+      u16             offset_middle;
-+#ifdef CONFIG_X86_64
-+      u32             offset_high;
-+      u32             reserved;
-+#endif
-+} __attribute__((packed));
-+
-+typedef struct gate_struct gate_desc;
-+
-+static inline unsigned long gate_offset(const gate_desc *g)
-+{
-+#ifdef CONFIG_X86_64
-+      return g->offset_low | ((unsigned long)g->offset_middle << 16) |
-+              ((unsigned long) g->offset_high << 32);
-+#else
-+      return g->offset_low | ((unsigned long)g->offset_middle << 16);
-+#endif
-+}
-+
-+static inline unsigned long gate_segment(const gate_desc *g)
-+{
-+      return g->segment;
-+}
-+
- struct desc_ptr {
-       unsigned short size;
-       unsigned long address;
-diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
-index a2c95522ac99..7b447d126d17 100644
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -8838,7 +8838,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
- 
-               vector =  exit_intr_info & INTR_INFO_VECTOR_MASK;
-               desc = (gate_desc *)vmx->host_idt_base + vector;
--              entry = gate_offset(*desc);
-+              entry = gate_offset(desc);
-               asm volatile(
- #ifdef CONFIG_X86_64
-                       "mov %%" _ASM_SP ", %[sp]\n\t"
-diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
-index 6c279c8f0a0e..49ee3315b9f7 100644
---- a/arch/x86/xen/enlighten_pv.c
-+++ b/arch/x86/xen/enlighten_pv.c
-@@ -591,12 +591,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
- {
-       unsigned long addr;
- 
--      if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
-+      if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT)
-               return 0;
- 
-       info->vector = vector;
- 
--      addr = gate_offset(*val);
-+      addr = gate_offset(val);
- #ifdef CONFIG_X86_64
-       /*
-        * Look for known traps using IST, and substitute them
-@@ -629,16 +629,16 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
-               ;
-       else {
-               /* Some other trap using IST? */
--              if (WARN_ON(val->ist != 0))
-+              if (WARN_ON(val->bits.ist != 0))
-                       return 0;
-       }
- #endif        /* CONFIG_X86_64 */
-       info->address = addr;
- 
--      info->cs = gate_segment(*val);
--      info->flags = val->dpl;
-+      info->cs = gate_segment(val);
-+      info->flags = val->bits.dpl;
-       /* interrupt gates clear IF */
--      if (val->type == GATE_INTERRUPT)
-+      if (val->bits.type == GATE_INTERRUPT)
-               info->flags |= 1 << 2;
- 
-       return 1;
--- 
-2.14.2
-
diff --git a/patches/kernel/0029-x86-traps-Simplify-pagefault-tracing-logic.patch b/patches/kernel/0029-x86-traps-Simplify-pagefault-tracing-logic.patch

new file mode 100644 (file)

index 0000000..801d82f
--- /dev/null
+++ b/patches/kernel/0029-x86-traps-Simplify-pagefault-tracing-logic.patch
@@ -0,0 +1,218 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 28 Aug 2017 08:47:22 +0200
+Subject: [PATCH] x86/traps: Simplify pagefault tracing logic
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Make use of the new irqvector tracing static key and remove the duplicated
+trace_do_pagefault() implementation.
+
+If irq vector tracing is disabled, then the overhead of this is a single
+NOP5, which is a reasonable tradeoff to avoid duplicated code and the
+unholy macro mess.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Link: http://lkml.kernel.org/r/20170828064956.672965407@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 11a7ffb01703c3bbb1e9b968893f4487a1b0b5a8)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8478bb5608747fd64c9fd4a2f5422fb4af756a50)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/traps.h | 10 +--------
+ arch/x86/kernel/kvm.c        |  2 +-
+ arch/x86/mm/fault.c          | 49 ++++++++++++--------------------------------
+ arch/x86/entry/entry_32.S    |  8 --------
+ arch/x86/entry/entry_64.S    | 13 +-----------
+ 5 files changed, 16 insertions(+), 66 deletions(-)
+
+diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
+index 01fd0a7f48cd..b4f322d6c95f 100644
+--- a/arch/x86/include/asm/traps.h
++++ b/arch/x86/include/asm/traps.h
+@@ -39,7 +39,6 @@ asmlinkage void machine_check(void);
+ asmlinkage void simd_coprocessor_error(void);
+ 
+ #ifdef CONFIG_TRACING
+-asmlinkage void trace_page_fault(void);
+ #define trace_stack_segment stack_segment
+ #define trace_divide_error divide_error
+ #define trace_bounds bounds
+@@ -54,6 +53,7 @@ asmlinkage void trace_page_fault(void);
+ #define trace_alignment_check alignment_check
+ #define trace_simd_coprocessor_error simd_coprocessor_error
+ #define trace_async_page_fault async_page_fault
++#define trace_page_fault page_fault
+ #endif
+ 
+ dotraplinkage void do_divide_error(struct pt_regs *, long);
+@@ -74,14 +74,6 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
+ #endif
+ dotraplinkage void do_general_protection(struct pt_regs *, long);
+ dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
+-#ifdef CONFIG_TRACING
+-dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long);
+-#else
+-static inline void trace_do_page_fault(struct pt_regs *regs, unsigned long error)
+-{
+-      do_page_fault(regs, error);
+-}
+-#endif
+ dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
+ dotraplinkage void do_coprocessor_error(struct pt_regs *, long);
+ dotraplinkage void do_alignment_check(struct pt_regs *, long);
+diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
+index e5e4306e4546..9e3798b00e40 100644
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -270,7 +270,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
+ 
+       switch (kvm_read_and_reset_pf_reason()) {
+       default:
+-              trace_do_page_fault(regs, error_code);
++              do_page_fault(regs, error_code);
+               break;
+       case KVM_PV_REASON_PAGE_NOT_PRESENT:
+               /* page is swapped out by the host. */
+diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
+index 955be01dd9cc..4ee9eb916826 100644
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -1253,10 +1253,6 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
+  * This routine handles page faults.  It determines the address,
+  * and the problem, and then passes it off to one of the appropriate
+  * routines.
+- *
+- * This function must have noinline because both callers
+- * {,trace_}do_page_fault() have notrace on. Having this an actual function
+- * guarantees there's a function trace entry.
+  */
+ static noinline void
+ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
+@@ -1491,27 +1487,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
+ }
+ NOKPROBE_SYMBOL(__do_page_fault);
+ 
+-dotraplinkage void notrace
+-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+-{
+-      unsigned long address = read_cr2(); /* Get the faulting address */
+-      enum ctx_state prev_state;
+-
+-      /*
+-       * We must have this function tagged with __kprobes, notrace and call
+-       * read_cr2() before calling anything else. To avoid calling any kind
+-       * of tracing machinery before we've observed the CR2 value.
+-       *
+-       * exception_{enter,exit}() contain all sorts of tracepoints.
+-       */
+-
+-      prev_state = exception_enter();
+-      __do_page_fault(regs, error_code, address);
+-      exception_exit(prev_state);
+-}
+-NOKPROBE_SYMBOL(do_page_fault);
+-
+-#ifdef CONFIG_TRACING
+ static nokprobe_inline void
+ trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
+                        unsigned long error_code)
+@@ -1522,22 +1497,24 @@ trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
+               trace_page_fault_kernel(address, regs, error_code);
+ }
+ 
++/*
++ * We must have this function blacklisted from kprobes, tagged with notrace
++ * and call read_cr2() before calling anything else. To avoid calling any
++ * kind of tracing machinery before we've observed the CR2 value.
++ *
++ * exception_{enter,exit}() contains all sorts of tracepoints.
++ */
+ dotraplinkage void notrace
+-trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
++do_page_fault(struct pt_regs *regs, unsigned long error_code)
+ {
+-      /*
+-       * The exception_enter and tracepoint processing could
+-       * trigger another page faults (user space callchain
+-       * reading) and destroy the original cr2 value, so read
+-       * the faulting address now.
+-       */
+-      unsigned long address = read_cr2();
++      unsigned long address = read_cr2(); /* Get the faulting address */
+       enum ctx_state prev_state;
+ 
+       prev_state = exception_enter();
+-      trace_page_fault_entries(address, regs, error_code);
++      if (trace_irqvectors_enabled())
++              trace_page_fault_entries(address, regs, error_code);
++
+       __do_page_fault(regs, error_code, address);
+       exception_exit(prev_state);
+ }
+-NOKPROBE_SYMBOL(trace_do_page_fault);
+-#endif /* CONFIG_TRACING */
++NOKPROBE_SYMBOL(do_page_fault);
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
+index 48ef7bb32c42..0092da1c056f 100644
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -891,14 +891,6 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
+ 
+ #endif /* CONFIG_HYPERV */
+ 
+-#ifdef CONFIG_TRACING
+-ENTRY(trace_page_fault)
+-      ASM_CLAC
+-      pushl   $trace_do_page_fault
+-      jmp     common_exception
+-END(trace_page_fault)
+-#endif
+-
+ ENTRY(page_fault)
+       ASM_CLAC
+       pushl   $do_page_fault
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index ca0b250eefc4..dfabcbf8e813 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -913,17 +913,6 @@ ENTRY(\sym)
+ END(\sym)
+ .endm
+ 
+-#ifdef CONFIG_TRACING
+-.macro trace_idtentry sym do_sym has_error_code:req
+-idtentry trace(\sym) trace(\do_sym) has_error_code=\has_error_code
+-idtentry \sym \do_sym has_error_code=\has_error_code
+-.endm
+-#else
+-.macro trace_idtentry sym do_sym has_error_code:req
+-idtentry \sym \do_sym has_error_code=\has_error_code
+-.endm
+-#endif
+-
+ idtentry divide_error                 do_divide_error                 has_error_code=0
+ idtentry overflow                     do_overflow                     has_error_code=0
+ idtentry bounds                               do_bounds                       has_error_code=0
+@@ -1091,7 +1080,7 @@ idtentry xen_stack_segment       do_stack_segment        has_error_code=1
+ #endif
+ 
+ idtentry general_protection   do_general_protection   has_error_code=1
+-trace_idtentry page_fault     do_page_fault           has_error_code=1
++idtentry page_fault           do_page_fault           has_error_code=1
+ 
+ #ifdef CONFIG_KVM_GUEST
+ idtentry async_page_fault     do_async_page_fault     has_error_code=1
+-- 
+2.14.2
+
diff --git a/patches/kernel/0030-x86-asm-Replace-access-to-desc_struct-a-b-fields.patch b/patches/kernel/0030-x86-asm-Replace-access-to-desc_struct-a-b-fields.patch

deleted file mode 100644 (file)

index 11a91a1..0000000
--- a/patches/kernel/0030-x86-asm-Replace-access-to-desc_struct-a-b-fields.patch
+++ /dev/null
@@ -1,93 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 28 Aug 2017 08:47:40 +0200
-Subject: [PATCH] x86/asm: Replace access to desc_struct:a/b fields
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The union inside of desc_struct which allows access to the raw u32 parts of
-the descriptors. This raw access part is about to go away.
-
-Replace the few code parts which access those fields.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Steven Rostedt <rostedt@goodmis.org>
-Link: http://lkml.kernel.org/r/20170828064958.120214366@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 9a98e7780022aa7cd201eb8a88a4f1d607b73cde)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8469c76c61ea9c3b86b596352d1148bace5ea706)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/xen/hypercall.h | 6 ++++--
- arch/x86/kernel/tls.c                | 2 +-
- arch/x86/xen/enlighten_pv.c          | 2 +-
- 3 files changed, 6 insertions(+), 4 deletions(-)
-
-diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
-index 11071fcd630e..9606688caa4b 100644
---- a/arch/x86/include/asm/xen/hypercall.h
-+++ b/arch/x86/include/asm/xen/hypercall.h
-@@ -552,6 +552,8 @@ static inline void
- MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
-                       struct desc_struct desc)
- {
-+      u32 *p = (u32 *) &desc;
-+
-       mcl->op = __HYPERVISOR_update_descriptor;
-       if (sizeof(maddr) == sizeof(long)) {
-               mcl->args[0] = maddr;
-@@ -559,8 +561,8 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
-       } else {
-               mcl->args[0] = maddr;
-               mcl->args[1] = maddr >> 32;
--              mcl->args[2] = desc.a;
--              mcl->args[3] = desc.b;
-+              mcl->args[2] = *p++;
-+              mcl->args[3] = *p;
-       }
- 
-       trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4);
-diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
-index dcd699baea1b..a106b9719c58 100644
---- a/arch/x86/kernel/tls.c
-+++ b/arch/x86/kernel/tls.c
-@@ -93,7 +93,7 @@ static void set_tls_desc(struct task_struct *p, int idx,
- 
-       while (n-- > 0) {
-               if (LDT_empty(info) || LDT_zero(info)) {
--                      desc->a = desc->b = 0;
-+                      memset(desc, 0, sizeof(*desc));
-               } else {
-                       fill_ldt(desc, info);
- 
-diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
-index 49ee3315b9f7..c76f5ff4d0d7 100644
---- a/arch/x86/xen/enlighten_pv.c
-+++ b/arch/x86/xen/enlighten_pv.c
-@@ -501,7 +501,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
- static inline bool desc_equal(const struct desc_struct *d1,
-                             const struct desc_struct *d2)
- {
--      return d1->a == d2->a && d1->b == d2->b;
-+      return !memcmp(d1, d2, sizeof(*d1));
- }
- 
- static void load_TLS_descriptor(struct thread_struct *t,
--- 
-2.14.2
-
diff --git a/patches/kernel/0030-x86-idt-Unify-gate_struct-handling-for-32-64-bit-ker.patch b/patches/kernel/0030-x86-idt-Unify-gate_struct-handling-for-32-64-bit-ker.patch

new file mode 100644 (file)

index 0000000..4cfc341
--- /dev/null
+++ b/patches/kernel/0030-x86-idt-Unify-gate_struct-handling-for-32-64-bit-ker.patch
@@ -0,0 +1,262 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 28 Aug 2017 08:47:37 +0200
+Subject: [PATCH] x86/idt: Unify gate_struct handling for 32/64-bit kernels
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The first 32 bits of gate struct are the same for 32 and 64 bit kernels.
+
+The 32-bit version uses desc_struct and no designated data structure,
+so we need different accessors for 32 and 64 bit kernels.
+
+Aside of that the macros which are necessary to build the 32-bit
+gate descriptor are horrible to read.
+
+Unify the gate structs and switch all code fiddling with it over.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Link: http://lkml.kernel.org/r/20170828064957.861974317@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 64b163fab684e3de47aa8db6cc08ae7d2e194373)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 587719b1926757eb7531e0631d63fb93cd60d0d3)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/desc.h      | 45 ++++++++++++++-----------------
+ arch/x86/include/asm/desc_defs.h | 57 ++++++++++++++++++++++++++--------------
+ arch/x86/kvm/vmx.c               |  2 +-
+ arch/x86/xen/enlighten_pv.c      | 12 ++++-----
+ 4 files changed, 63 insertions(+), 53 deletions(-)
+
+diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
+index d0a21b12dd58..57e502a4e92f 100644
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -83,33 +83,25 @@ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
+       return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
+ }
+ 
+-#ifdef CONFIG_X86_64
+-
+ static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
+                            unsigned dpl, unsigned ist, unsigned seg)
+ {
+-      gate->offset_low        = PTR_LOW(func);
++      gate->offset_low        = (u16) func;
++      gate->bits.p            = 1;
++      gate->bits.dpl          = dpl;
++      gate->bits.zero         = 0;
++      gate->bits.type         = type;
++      gate->offset_middle     = (u16) (func >> 16);
++#ifdef CONFIG_X86_64
+       gate->segment           = __KERNEL_CS;
+-      gate->ist               = ist;
+-      gate->p                 = 1;
+-      gate->dpl               = dpl;
+-      gate->zero0             = 0;
+-      gate->zero1             = 0;
+-      gate->type              = type;
+-      gate->offset_middle     = PTR_MIDDLE(func);
+-      gate->offset_high       = PTR_HIGH(func);
+-}
+-
++      gate->bits.ist          = ist;
++      gate->reserved          = 0;
++      gate->offset_high       = (u32) (func >> 32);
+ #else
+-static inline void pack_gate(gate_desc *gate, unsigned char type,
+-                           unsigned long base, unsigned dpl, unsigned flags,
+-                           unsigned short seg)
+-{
+-      gate->a = (seg << 16) | (base & 0xffff);
+-      gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
+-}
+-
++      gate->segment           = seg;
++      gate->bits.ist          = 0;
+ #endif
++}
+ 
+ static inline int desc_empty(const void *ptr)
+ {
+@@ -185,7 +177,8 @@ static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
+ }
+ 
+ 
+-static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size)
++static inline void set_tssldt_descriptor(void *d, unsigned long addr,
++                                       unsigned type, unsigned size)
+ {
+ #ifdef CONFIG_X86_64
+       struct ldttss_desc64 *desc = d;
+@@ -193,13 +186,13 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
+       memset(desc, 0, sizeof(*desc));
+ 
+       desc->limit0            = size & 0xFFFF;
+-      desc->base0             = PTR_LOW(addr);
+-      desc->base1             = PTR_MIDDLE(addr) & 0xFF;
++      desc->base0             = (u16) addr;
++      desc->base1             = (addr >> 16) & 0xFF;
+       desc->type              = type;
+       desc->p                 = 1;
+       desc->limit1            = (size >> 16) & 0xF;
+-      desc->base2             = (PTR_MIDDLE(addr) >> 8) & 0xFF;
+-      desc->base3             = PTR_HIGH(addr);
++      desc->base2             = (addr >> 24) & 0xFF;
++      desc->base3             = (u32) (addr >> 32);
+ #else
+       pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
+ #endif
+diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
+index 49265345d4d2..d684bee8a59a 100644
+--- a/arch/x86/include/asm/desc_defs.h
++++ b/arch/x86/include/asm/desc_defs.h
+@@ -47,20 +47,6 @@ enum {
+       GATE_TASK = 0x5,
+ };
+ 
+-/* 16byte gate */
+-struct gate_struct64 {
+-      u16 offset_low;
+-      u16 segment;
+-      unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
+-      u16 offset_middle;
+-      u32 offset_high;
+-      u32 zero1;
+-} __attribute__((packed));
+-
+-#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF)
+-#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF)
+-#define PTR_HIGH(x) ((unsigned long long)(x) >> 32)
+-
+ enum {
+       DESC_TSS = 0x9,
+       DESC_LDT = 0x2,
+@@ -77,20 +63,51 @@ struct ldttss_desc64 {
+       u32 zero1;
+ } __attribute__((packed));
+ 
++
+ #ifdef CONFIG_X86_64
+-typedef struct gate_struct64 gate_desc;
+ typedef struct ldttss_desc64 ldt_desc;
+ typedef struct ldttss_desc64 tss_desc;
+-#define gate_offset(g) ((g).offset_low | ((unsigned long)(g).offset_middle << 16) | ((unsigned long)(g).offset_high << 32))
+-#define gate_segment(g) ((g).segment)
+ #else
+-typedef struct desc_struct gate_desc;
+ typedef struct desc_struct ldt_desc;
+ typedef struct desc_struct tss_desc;
+-#define gate_offset(g)                (((g).b & 0xffff0000) | ((g).a & 0x0000ffff))
+-#define gate_segment(g)               ((g).a >> 16)
+ #endif
+ 
++struct idt_bits {
++      u16             ist     : 3,
++                      zero    : 5,
++                      type    : 5,
++                      dpl     : 2,
++                      p       : 1;
++} __attribute__((packed));
++
++struct gate_struct {
++      u16             offset_low;
++      u16             segment;
++      struct idt_bits bits;
++      u16             offset_middle;
++#ifdef CONFIG_X86_64
++      u32             offset_high;
++      u32             reserved;
++#endif
++} __attribute__((packed));
++
++typedef struct gate_struct gate_desc;
++
++static inline unsigned long gate_offset(const gate_desc *g)
++{
++#ifdef CONFIG_X86_64
++      return g->offset_low | ((unsigned long)g->offset_middle << 16) |
++              ((unsigned long) g->offset_high << 32);
++#else
++      return g->offset_low | ((unsigned long)g->offset_middle << 16);
++#endif
++}
++
++static inline unsigned long gate_segment(const gate_desc *g)
++{
++      return g->segment;
++}
++
+ struct desc_ptr {
+       unsigned short size;
+       unsigned long address;
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index a2c95522ac99..7b447d126d17 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -8838,7 +8838,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
+ 
+               vector =  exit_intr_info & INTR_INFO_VECTOR_MASK;
+               desc = (gate_desc *)vmx->host_idt_base + vector;
+-              entry = gate_offset(*desc);
++              entry = gate_offset(desc);
+               asm volatile(
+ #ifdef CONFIG_X86_64
+                       "mov %%" _ASM_SP ", %[sp]\n\t"
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index 6c279c8f0a0e..49ee3315b9f7 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -591,12 +591,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
+ {
+       unsigned long addr;
+ 
+-      if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
++      if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT)
+               return 0;
+ 
+       info->vector = vector;
+ 
+-      addr = gate_offset(*val);
++      addr = gate_offset(val);
+ #ifdef CONFIG_X86_64
+       /*
+        * Look for known traps using IST, and substitute them
+@@ -629,16 +629,16 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
+               ;
+       else {
+               /* Some other trap using IST? */
+-              if (WARN_ON(val->ist != 0))
++              if (WARN_ON(val->bits.ist != 0))
+                       return 0;
+       }
+ #endif        /* CONFIG_X86_64 */
+       info->address = addr;
+ 
+-      info->cs = gate_segment(*val);
+-      info->flags = val->dpl;
++      info->cs = gate_segment(val);
++      info->flags = val->bits.dpl;
+       /* interrupt gates clear IF */
+-      if (val->type == GATE_INTERRUPT)
++      if (val->bits.type == GATE_INTERRUPT)
+               info->flags |= 1 << 2;
+ 
+       return 1;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0031-x86-asm-Replace-access-to-desc_struct-a-b-fields.patch b/patches/kernel/0031-x86-asm-Replace-access-to-desc_struct-a-b-fields.patch

new file mode 100644 (file)

index 0000000..11a91a1
--- /dev/null
+++ b/patches/kernel/0031-x86-asm-Replace-access-to-desc_struct-a-b-fields.patch
@@ -0,0 +1,93 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 28 Aug 2017 08:47:40 +0200
+Subject: [PATCH] x86/asm: Replace access to desc_struct:a/b fields
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The union inside of desc_struct which allows access to the raw u32 parts of
+the descriptors. This raw access part is about to go away.
+
+Replace the few code parts which access those fields.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Link: http://lkml.kernel.org/r/20170828064958.120214366@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 9a98e7780022aa7cd201eb8a88a4f1d607b73cde)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8469c76c61ea9c3b86b596352d1148bace5ea706)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/xen/hypercall.h | 6 ++++--
+ arch/x86/kernel/tls.c                | 2 +-
+ arch/x86/xen/enlighten_pv.c          | 2 +-
+ 3 files changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
+index 11071fcd630e..9606688caa4b 100644
+--- a/arch/x86/include/asm/xen/hypercall.h
++++ b/arch/x86/include/asm/xen/hypercall.h
+@@ -552,6 +552,8 @@ static inline void
+ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
+                       struct desc_struct desc)
+ {
++      u32 *p = (u32 *) &desc;
++
+       mcl->op = __HYPERVISOR_update_descriptor;
+       if (sizeof(maddr) == sizeof(long)) {
+               mcl->args[0] = maddr;
+@@ -559,8 +561,8 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
+       } else {
+               mcl->args[0] = maddr;
+               mcl->args[1] = maddr >> 32;
+-              mcl->args[2] = desc.a;
+-              mcl->args[3] = desc.b;
++              mcl->args[2] = *p++;
++              mcl->args[3] = *p;
+       }
+ 
+       trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4);
+diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
+index dcd699baea1b..a106b9719c58 100644
+--- a/arch/x86/kernel/tls.c
++++ b/arch/x86/kernel/tls.c
+@@ -93,7 +93,7 @@ static void set_tls_desc(struct task_struct *p, int idx,
+ 
+       while (n-- > 0) {
+               if (LDT_empty(info) || LDT_zero(info)) {
+-                      desc->a = desc->b = 0;
++                      memset(desc, 0, sizeof(*desc));
+               } else {
+                       fill_ldt(desc, info);
+ 
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index 49ee3315b9f7..c76f5ff4d0d7 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -501,7 +501,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
+ static inline bool desc_equal(const struct desc_struct *d1,
+                             const struct desc_struct *d2)
+ {
+-      return d1->a == d2->a && d1->b == d2->b;
++      return !memcmp(d1, d2, sizeof(*d1));
+ }
+ 
+ static void load_TLS_descriptor(struct thread_struct *t,
+-- 
+2.14.2
+
diff --git a/patches/kernel/0031-x86-xen-Get-rid-of-paravirt-op-adjust_exception_fram.patch b/patches/kernel/0031-x86-xen-Get-rid-of-paravirt-op-adjust_exception_fram.patch

deleted file mode 100644 (file)

index c58c59c..0000000
--- a/patches/kernel/0031-x86-xen-Get-rid-of-paravirt-op-adjust_exception_fram.patch
+++ /dev/null
@@ -1,436 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Thu, 31 Aug 2017 19:42:49 +0200
-Subject: [PATCH] x86/xen: Get rid of paravirt op adjust_exception_frame
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-When running as Xen pv-guest the exception frame on the stack contains
-%r11 and %rcx additional to the other data pushed by the processor.
-
-Instead of having a paravirt op being called for each exception type
-prepend the Xen specific code to each exception entry. When running as
-Xen pv-guest just use the exception entry with prepended instructions,
-otherwise use the entry without the Xen specific code.
-
-[ tglx: Merged through tip to avoid ugly merge conflict ]
-
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: xen-devel@lists.xenproject.org
-Cc: boris.ostrovsky@oracle.com
-Cc: luto@amacapital.net
-Link: http://lkml.kernel.org/r/20170831174249.26853-1-jg@pfupf.net
-(backported from commit 5878d5d6fdef6447d73b0acc121ba445bef37f53)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9a6fb927deb3ebbe831741ca82081714637181a7)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/paravirt.h       |  5 --
- arch/x86/include/asm/paravirt_types.h |  3 --
- arch/x86/include/asm/proto.h          |  3 ++
- arch/x86/include/asm/traps.h          | 28 ++++++++--
- arch/x86/xen/xen-ops.h                |  1 -
- arch/x86/kernel/asm-offsets_64.c      |  1 -
- arch/x86/kernel/paravirt.c            |  3 --
- arch/x86/xen/enlighten_pv.c           | 98 +++++++++++++++++++++++------------
- arch/x86/xen/irq.c                    |  3 --
- arch/x86/entry/entry_64.S             | 23 ++------
- arch/x86/entry/entry_64_compat.S      |  1 -
- arch/x86/xen/xen-asm_64.S             | 41 +++++++++++++--
- 12 files changed, 133 insertions(+), 77 deletions(-)
-
-diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
-index 9ccac1926587..c25dd22f7c70 100644
---- a/arch/x86/include/asm/paravirt.h
-+++ b/arch/x86/include/asm/paravirt.h
-@@ -960,11 +960,6 @@ extern void default_banner(void);
- #define GET_CR2_INTO_RAX                              \
-       call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
- 
--#define PARAVIRT_ADJUST_EXCEPTION_FRAME                                       \
--      PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
--                CLBR_NONE,                                            \
--                call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
--
- #define USERGS_SYSRET64                                                       \
-       PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
-                 CLBR_NONE,                                            \
-diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
-index 9ffc36bfe4cd..6b64fc6367f2 100644
---- a/arch/x86/include/asm/paravirt_types.h
-+++ b/arch/x86/include/asm/paravirt_types.h
-@@ -196,9 +196,6 @@ struct pv_irq_ops {
-       void (*safe_halt)(void);
-       void (*halt)(void);
- 
--#ifdef CONFIG_X86_64
--      void (*adjust_exception_frame)(void);
--#endif
- } __no_randomize_layout;
- 
- struct pv_mmu_ops {
-diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
-index 8d3964fc5f91..b408b1886195 100644
---- a/arch/x86/include/asm/proto.h
-+++ b/arch/x86/include/asm/proto.h
-@@ -24,6 +24,9 @@ void entry_SYSENTER_compat(void);
- void __end_entry_SYSENTER_compat(void);
- void entry_SYSCALL_compat(void);
- void entry_INT80_compat(void);
-+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
-+void xen_entry_INT80_compat(void);
-+#endif
- #endif
- 
- void x86_configure_nx(void);
-diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
-index b4f322d6c95f..feb89dbe359d 100644
---- a/arch/x86/include/asm/traps.h
-+++ b/arch/x86/include/asm/traps.h
-@@ -13,9 +13,6 @@ asmlinkage void divide_error(void);
- asmlinkage void debug(void);
- asmlinkage void nmi(void);
- asmlinkage void int3(void);
--asmlinkage void xen_debug(void);
--asmlinkage void xen_int3(void);
--asmlinkage void xen_stack_segment(void);
- asmlinkage void overflow(void);
- asmlinkage void bounds(void);
- asmlinkage void invalid_op(void);
-@@ -56,6 +53,31 @@ asmlinkage void simd_coprocessor_error(void);
- #define trace_page_fault page_fault
- #endif
- 
-+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
-+asmlinkage void xen_divide_error(void);
-+asmlinkage void xen_xendebug(void);
-+asmlinkage void xen_xenint3(void);
-+asmlinkage void xen_nmi(void);
-+asmlinkage void xen_overflow(void);
-+asmlinkage void xen_bounds(void);
-+asmlinkage void xen_invalid_op(void);
-+asmlinkage void xen_device_not_available(void);
-+asmlinkage void xen_double_fault(void);
-+asmlinkage void xen_coprocessor_segment_overrun(void);
-+asmlinkage void xen_invalid_TSS(void);
-+asmlinkage void xen_segment_not_present(void);
-+asmlinkage void xen_stack_segment(void);
-+asmlinkage void xen_general_protection(void);
-+asmlinkage void xen_page_fault(void);
-+asmlinkage void xen_spurious_interrupt_bug(void);
-+asmlinkage void xen_coprocessor_error(void);
-+asmlinkage void xen_alignment_check(void);
-+#ifdef CONFIG_X86_MCE
-+asmlinkage void xen_machine_check(void);
-+#endif /* CONFIG_X86_MCE */
-+asmlinkage void xen_simd_coprocessor_error(void);
-+#endif
-+
- dotraplinkage void do_divide_error(struct pt_regs *, long);
- dotraplinkage void do_debug(struct pt_regs *, long);
- dotraplinkage void do_nmi(struct pt_regs *, long);
-diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
-index 70301ac0d414..c8a6d224f7ed 100644
---- a/arch/x86/xen/xen-ops.h
-+++ b/arch/x86/xen/xen-ops.h
-@@ -138,7 +138,6 @@ __visible void xen_restore_fl_direct(unsigned long);
- __visible void xen_iret(void);
- __visible void xen_sysret32(void);
- __visible void xen_sysret64(void);
--__visible void xen_adjust_exception_frame(void);
- 
- extern int xen_panic_handler_init(void);
- 
-diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
-index 99332f550c48..cf42206926af 100644
---- a/arch/x86/kernel/asm-offsets_64.c
-+++ b/arch/x86/kernel/asm-offsets_64.c
-@@ -20,7 +20,6 @@ static char syscalls_ia32[] = {
- int main(void)
- {
- #ifdef CONFIG_PARAVIRT
--      OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
-       OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
-       OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
-       BLANK();
-diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
-index bc0a849589bb..a14df9eecfed 100644
---- a/arch/x86/kernel/paravirt.c
-+++ b/arch/x86/kernel/paravirt.c
-@@ -319,9 +319,6 @@ __visible struct pv_irq_ops pv_irq_ops = {
-       .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
-       .safe_halt = native_safe_halt,
-       .halt = native_halt,
--#ifdef CONFIG_X86_64
--      .adjust_exception_frame = paravirt_nop,
--#endif
- };
- 
- __visible struct pv_cpu_ops pv_cpu_ops = {
-diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
-index c76f5ff4d0d7..ae2a2e2d6362 100644
---- a/arch/x86/xen/enlighten_pv.c
-+++ b/arch/x86/xen/enlighten_pv.c
-@@ -586,6 +586,70 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
-       preempt_enable();
- }
- 
-+#ifdef CONFIG_X86_64
-+struct trap_array_entry {
-+      void (*orig)(void);
-+      void (*xen)(void);
-+      bool ist_okay;
-+};
-+
-+static struct trap_array_entry trap_array[] = {
-+      { debug,                       xen_xendebug,                    true },
-+      { int3,                        xen_xenint3,                     true },
-+      { double_fault,                xen_double_fault,                true },
-+#ifdef CONFIG_X86_MCE
-+      { machine_check,               xen_machine_check,               true },
-+#endif
-+      { nmi,                         xen_nmi,                         true },
-+      { overflow,                    xen_overflow,                    false },
-+#ifdef CONFIG_IA32_EMULATION
-+      { entry_INT80_compat,          xen_entry_INT80_compat,          false },
-+#endif
-+      { page_fault,                  xen_page_fault,                  false },
-+      { divide_error,                xen_divide_error,                false },
-+      { bounds,                      xen_bounds,                      false },
-+      { invalid_op,                  xen_invalid_op,                  false },
-+      { device_not_available,        xen_device_not_available,        false },
-+      { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false },
-+      { invalid_TSS,                 xen_invalid_TSS,                 false },
-+      { segment_not_present,         xen_segment_not_present,         false },
-+      { stack_segment,               xen_stack_segment,               false },
-+      { general_protection,          xen_general_protection,          false },
-+      { spurious_interrupt_bug,      xen_spurious_interrupt_bug,      false },
-+      { coprocessor_error,           xen_coprocessor_error,           false },
-+      { alignment_check,             xen_alignment_check,             false },
-+      { simd_coprocessor_error,      xen_simd_coprocessor_error,      false },
-+};
-+
-+static bool get_trap_addr(void **addr, unsigned int ist)
-+{
-+      unsigned int nr;
-+      bool ist_okay = false;
-+
-+      /*
-+       * Replace trap handler addresses by Xen specific ones.
-+       * Check for known traps using IST and whitelist them.
-+       * The debugger ones are the only ones we care about.
-+       * Xen will handle faults like double_fault, * so we should never see
-+       * them.  Warn if there's an unexpected IST-using fault handler.
-+       */
-+      for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) {
-+              struct trap_array_entry *entry = trap_array + nr;
-+
-+              if (*addr == entry->orig) {
-+                      *addr = entry->xen;
-+                      ist_okay = entry->ist_okay;
-+                      break;
-+              }
-+      }
-+
-+      if (WARN_ON(ist != 0 && !ist_okay))
-+              return false;
-+
-+      return true;
-+}
-+#endif
-+
- static int cvt_gate_to_trap(int vector, const gate_desc *val,
-                           struct trap_info *info)
- {
-@@ -598,40 +662,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
- 
-       addr = gate_offset(val);
- #ifdef CONFIG_X86_64
--      /*
--       * Look for known traps using IST, and substitute them
--       * appropriately.  The debugger ones are the only ones we care
--       * about.  Xen will handle faults like double_fault,
--       * so we should never see them.  Warn if
--       * there's an unexpected IST-using fault handler.
--       */
--      if (addr == (unsigned long)debug)
--              addr = (unsigned long)xen_debug;
--      else if (addr == (unsigned long)int3)
--              addr = (unsigned long)xen_int3;
--      else if (addr == (unsigned long)stack_segment)
--              addr = (unsigned long)xen_stack_segment;
--      else if (addr == (unsigned long)double_fault) {
--              /* Don't need to handle these */
-+      if (!get_trap_addr((void **)&addr, val->bits.ist))
-               return 0;
--#ifdef CONFIG_X86_MCE
--      } else if (addr == (unsigned long)machine_check) {
--              /*
--               * when xen hypervisor inject vMCE to guest,
--               * use native mce handler to handle it
--               */
--              ;
--#endif
--      } else if (addr == (unsigned long)nmi)
--              /*
--               * Use the native version as well.
--               */
--              ;
--      else {
--              /* Some other trap using IST? */
--              if (WARN_ON(val->bits.ist != 0))
--                      return 0;
--      }
- #endif        /* CONFIG_X86_64 */
-       info->address = addr;
- 
-diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
-index 33e92955e09d..d4eff5676cfa 100644
---- a/arch/x86/xen/irq.c
-+++ b/arch/x86/xen/irq.c
-@@ -123,9 +123,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
- 
-       .safe_halt = xen_safe_halt,
-       .halt = xen_halt,
--#ifdef CONFIG_X86_64
--      .adjust_exception_frame = xen_adjust_exception_frame,
--#endif
- };
- 
- void __init xen_init_irq_ops(void)
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index dfabcbf8e813..c12260ef3e4b 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -829,7 +829,6 @@ ENTRY(\sym)
-       .endif
- 
-       ASM_CLAC
--      PARAVIRT_ADJUST_EXCEPTION_FRAME
- 
-       .ifeq \has_error_code
-       pushq   $-1                             /* ORIG_RAX: no syscall to restart */
-@@ -975,7 +974,7 @@ ENTRY(do_softirq_own_stack)
- ENDPROC(do_softirq_own_stack)
- 
- #ifdef CONFIG_XEN
--idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
-+idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
- 
- /*
-  * A note on the "critical region" in our callback handler.
-@@ -1042,8 +1041,6 @@ ENTRY(xen_failsafe_callback)
-       movq    8(%rsp), %r11
-       addq    $0x30, %rsp
-       pushq   $0                              /* RIP */
--      pushq   %r11
--      pushq   %rcx
-       UNWIND_HINT_IRET_REGS offset=8
-       jmp     general_protection
- 1:    /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
-@@ -1074,9 +1071,8 @@ idtentry int3                    do_int3                 has_error_code=0        paranoid=1 shift_ist=DEBUG_STACK
- idtentry stack_segment                do_stack_segment        has_error_code=1
- 
- #ifdef CONFIG_XEN
--idtentry xen_debug            do_debug                has_error_code=0
--idtentry xen_int3             do_int3                 has_error_code=0
--idtentry xen_stack_segment    do_stack_segment        has_error_code=1
-+idtentry xendebug             do_debug                has_error_code=0
-+idtentry xenint3              do_int3                 has_error_code=0
- #endif
- 
- idtentry general_protection   do_general_protection   has_error_code=1
-@@ -1240,20 +1236,9 @@ ENTRY(error_exit)
- END(error_exit)
- 
- /* Runs on exception stack */
-+/* XXX: broken on Xen PV */
- ENTRY(nmi)
-       UNWIND_HINT_IRET_REGS
--      /*
--       * Fix up the exception frame if we're on Xen.
--       * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
--       * one value to the stack on native, so it may clobber the rdx
--       * scratch slot, but it won't clobber any of the important
--       * slots past it.
--       *
--       * Xen is a different story, because the Xen frame itself overlaps
--       * the "NMI executing" variable.
--       */
--      PARAVIRT_ADJUST_EXCEPTION_FRAME
--
-       /*
-        * We allow breakpoints in NMIs. If a breakpoint occurs, then
-        * the iretq it performs will take us out of NMI context.
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index 5314d7b8e5ad..d8468ba24be0 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -293,7 +293,6 @@ ENTRY(entry_INT80_compat)
-       /*
-        * Interrupts are off on entry.
-        */
--      PARAVIRT_ADJUST_EXCEPTION_FRAME
-       ASM_CLAC                        /* Do this early to minimize exposure */
-       SWAPGS
- 
-diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
-index 3a3b6a211584..dae2cc33afb5 100644
---- a/arch/x86/xen/xen-asm_64.S
-+++ b/arch/x86/xen/xen-asm_64.S
-@@ -16,11 +16,42 @@
- 
- #include <linux/linkage.h>
- 
--ENTRY(xen_adjust_exception_frame)
--      mov 8+0(%rsp), %rcx
--      mov 8+8(%rsp), %r11
--      ret $16
--ENDPROC(xen_adjust_exception_frame)
-+.macro xen_pv_trap name
-+ENTRY(xen_\name)
-+      pop %rcx
-+      pop %r11
-+      jmp  \name
-+END(xen_\name)
-+.endm
-+
-+xen_pv_trap divide_error
-+xen_pv_trap debug
-+xen_pv_trap xendebug
-+xen_pv_trap int3
-+xen_pv_trap xenint3
-+xen_pv_trap nmi
-+xen_pv_trap overflow
-+xen_pv_trap bounds
-+xen_pv_trap invalid_op
-+xen_pv_trap device_not_available
-+xen_pv_trap double_fault
-+xen_pv_trap coprocessor_segment_overrun
-+xen_pv_trap invalid_TSS
-+xen_pv_trap segment_not_present
-+xen_pv_trap stack_segment
-+xen_pv_trap general_protection
-+xen_pv_trap page_fault
-+xen_pv_trap spurious_interrupt_bug
-+xen_pv_trap coprocessor_error
-+xen_pv_trap alignment_check
-+#ifdef CONFIG_X86_MCE
-+xen_pv_trap machine_check
-+#endif /* CONFIG_X86_MCE */
-+xen_pv_trap simd_coprocessor_error
-+#ifdef CONFIG_IA32_EMULATION
-+xen_pv_trap entry_INT80_compat
-+#endif
-+xen_pv_trap hypervisor_callback
- 
- hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
- /*
--- 
-2.14.2
-
diff --git a/patches/kernel/0032-x86-paravirt-Remove-no-longer-used-paravirt-function.patch b/patches/kernel/0032-x86-paravirt-Remove-no-longer-used-paravirt-function.patch

deleted file mode 100644 (file)

index 516eb30..0000000
--- a/patches/kernel/0032-x86-paravirt-Remove-no-longer-used-paravirt-function.patch
+++ /dev/null
@@ -1,390 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Mon, 4 Sep 2017 12:25:27 +0200
-Subject: [PATCH] x86/paravirt: Remove no longer used paravirt functions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-With removal of lguest some of the paravirt functions are no longer
-needed:
-
-       ->read_cr4()
-       ->store_idt()
-       ->set_pmd_at()
-       ->set_pud_at()
-       ->pte_update()
-
-Remove them.
-
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: akataria@vmware.com
-Cc: boris.ostrovsky@oracle.com
-Cc: chrisw@sous-sol.org
-Cc: jeremy@goop.org
-Cc: rusty@rustcorp.com.au
-Cc: virtualization@lists.linux-foundation.org
-Cc: xen-devel@lists.xenproject.org
-Link: http://lkml.kernel.org/r/20170904102527.25409-1-jgross@suse.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 87930019c713873a1c3b9bd55dde46e81f70c8f1)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit edf3ab0080a6e79a300753e66929b0b7499eaec5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/desc.h           |  3 +--
- arch/x86/include/asm/paravirt.h       | 37 -----------------------------------
- arch/x86/include/asm/paravirt_types.h |  9 ---------
- arch/x86/include/asm/pgtable.h        | 27 ++++---------------------
- arch/x86/include/asm/special_insns.h  | 10 +++++-----
- arch/x86/kernel/paravirt.c            |  5 -----
- arch/x86/kvm/vmx.c                    |  2 +-
- arch/x86/mm/pgtable.c                 |  7 +------
- arch/x86/xen/enlighten_pv.c           |  2 --
- arch/x86/xen/mmu_pv.c                 |  2 --
- 10 files changed, 12 insertions(+), 92 deletions(-)
-
-diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
-index 57e502a4e92f..f995e5a09136 100644
---- a/arch/x86/include/asm/desc.h
-+++ b/arch/x86/include/asm/desc.h
-@@ -120,7 +120,6 @@ static inline int desc_empty(const void *ptr)
- #define load_ldt(ldt)                         asm volatile("lldt %0"::"m" (ldt))
- 
- #define store_gdt(dtr)                                native_store_gdt(dtr)
--#define store_idt(dtr)                                native_store_idt(dtr)
- #define store_tr(tr)                          (tr = native_store_tr())
- 
- #define load_TLS(t, cpu)                      native_load_tls(t, cpu)
-@@ -241,7 +240,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr)
-       asm volatile("sgdt %0":"=m" (*dtr));
- }
- 
--static inline void native_store_idt(struct desc_ptr *dtr)
-+static inline void store_idt(struct desc_ptr *dtr)
- {
-       asm volatile("sidt %0":"=m" (*dtr));
- }
-diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
-index c25dd22f7c70..12deec722cf0 100644
---- a/arch/x86/include/asm/paravirt.h
-+++ b/arch/x86/include/asm/paravirt.h
-@@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x)
-       PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
- }
- 
--static inline unsigned long __read_cr4(void)
--{
--      return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
--}
--
- static inline void __write_cr4(unsigned long x)
- {
-       PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
-@@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries)
- {
-       PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
- }
--static inline void store_idt(struct desc_ptr *dtr)
--{
--      PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
--}
- static inline unsigned long paravirt_store_tr(void)
- {
-       return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
-@@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn)
-       PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
- }
- 
--static inline void pte_update(struct mm_struct *mm, unsigned long addr,
--                            pte_t *ptep)
--{
--      PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
--}
--
- static inline pte_t __pte(pteval_t val)
- {
-       pteval_t ret;
-@@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-               PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
- }
- 
--static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
--                            pmd_t *pmdp, pmd_t pmd)
--{
--      if (sizeof(pmdval_t) > sizeof(long))
--              /* 5 arg words */
--              pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
--      else
--              PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
--                          native_pmd_val(pmd));
--}
--
--static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
--                            pud_t *pudp, pud_t pud)
--{
--      if (sizeof(pudval_t) > sizeof(long))
--              /* 5 arg words */
--              pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
--      else
--              PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
--                          native_pud_val(pud));
--}
--
- static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
- {
-       pmdval_t val = native_pmd_val(pmd);
-diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
-index 6b64fc6367f2..42873edd9f9d 100644
---- a/arch/x86/include/asm/paravirt_types.h
-+++ b/arch/x86/include/asm/paravirt_types.h
-@@ -107,7 +107,6 @@ struct pv_cpu_ops {
-       unsigned long (*read_cr0)(void);
-       void (*write_cr0)(unsigned long);
- 
--      unsigned long (*read_cr4)(void);
-       void (*write_cr4)(unsigned long);
- 
- #ifdef CONFIG_X86_64
-@@ -119,8 +118,6 @@ struct pv_cpu_ops {
-       void (*load_tr_desc)(void);
-       void (*load_gdt)(const struct desc_ptr *);
-       void (*load_idt)(const struct desc_ptr *);
--      /* store_gdt has been removed. */
--      void (*store_idt)(struct desc_ptr *);
-       void (*set_ldt)(const void *desc, unsigned entries);
-       unsigned long (*store_tr)(void);
-       void (*load_tls)(struct thread_struct *t, unsigned int cpu);
-@@ -245,12 +242,6 @@ struct pv_mmu_ops {
-       void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
-                          pte_t *ptep, pte_t pteval);
-       void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
--      void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
--                         pmd_t *pmdp, pmd_t pmdval);
--      void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
--                         pud_t *pudp, pud_t pudval);
--      void (*pte_update)(struct mm_struct *mm, unsigned long addr,
--                         pte_t *ptep);
- 
-       pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
-                                       pte_t *ptep);
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index 77037b6f1caa..bb8e9ea7deb4 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -43,8 +43,6 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
- #else  /* !CONFIG_PARAVIRT */
- #define set_pte(ptep, pte)            native_set_pte(ptep, pte)
- #define set_pte_at(mm, addr, ptep, pte)       native_set_pte_at(mm, addr, ptep, pte)
--#define set_pmd_at(mm, addr, pmdp, pmd)       native_set_pmd_at(mm, addr, pmdp, pmd)
--#define set_pud_at(mm, addr, pudp, pud)       native_set_pud_at(mm, addr, pudp, pud)
- 
- #define set_pte_atomic(ptep, pte)                                     \
-       native_set_pte_atomic(ptep, pte)
-@@ -75,8 +73,6 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
- #define pte_clear(mm, addr, ptep)     native_pte_clear(mm, addr, ptep)
- #define pmd_clear(pmd)                        native_pmd_clear(pmd)
- 
--#define pte_update(mm, addr, ptep)              do { } while (0)
--
- #define pgd_val(x)    native_pgd_val(x)
- #define __pgd(x)      native_make_pgd(x)
- 
-@@ -965,31 +961,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
-       native_set_pte(ptep, pte);
- }
- 
--static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
--                                   pmd_t *pmdp , pmd_t pmd)
-+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-+                            pmd_t *pmdp, pmd_t pmd)
- {
-       native_set_pmd(pmdp, pmd);
- }
- 
--static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
--                                   pud_t *pudp, pud_t pud)
-+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
-+                            pud_t *pudp, pud_t pud)
- {
-       native_set_pud(pudp, pud);
- }
- 
--#ifndef CONFIG_PARAVIRT
--/*
-- * Rules for using pte_update - it must be called after any PTE update which
-- * has not been done using the set_pte / clear_pte interfaces.  It is used by
-- * shadow mode hypervisors to resynchronize the shadow page tables.  Kernel PTE
-- * updates should either be sets, clears, or set_pte_atomic for P->P
-- * transitions, which means this hook should only be called for user PTEs.
-- * This hook implies a P->P protection or access change has taken place, which
-- * requires a subsequent TLB flush.
-- */
--#define pte_update(mm, addr, ptep)            do { } while (0)
--#endif
--
- /*
-  * We only update the dirty/accessed state if we set
-  * the dirty bit by hand in the kernel, since the hardware
-@@ -1017,7 +1000,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
-                                      pte_t *ptep)
- {
-       pte_t pte = native_ptep_get_and_clear(ptep);
--      pte_update(mm, addr, ptep);
-       return pte;
- }
- 
-@@ -1044,7 +1026,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
-                                     unsigned long addr, pte_t *ptep)
- {
-       clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
--      pte_update(mm, addr, ptep);
- }
- 
- #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
-diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
-index 9efaabf5b54b..a24dfcf79f4a 100644
---- a/arch/x86/include/asm/special_insns.h
-+++ b/arch/x86/include/asm/special_insns.h
-@@ -135,6 +135,11 @@ static inline void native_wbinvd(void)
- 
- extern asmlinkage void native_load_gs_index(unsigned);
- 
-+static inline unsigned long __read_cr4(void)
-+{
-+      return native_read_cr4();
-+}
-+
- #ifdef CONFIG_PARAVIRT
- #include <asm/paravirt.h>
- #else
-@@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x)
-       native_write_cr3(x);
- }
- 
--static inline unsigned long __read_cr4(void)
--{
--      return native_read_cr4();
--}
--
- static inline void __write_cr4(unsigned long x)
- {
-       native_write_cr4(x);
-diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
-index a14df9eecfed..19a3e8f961c7 100644
---- a/arch/x86/kernel/paravirt.c
-+++ b/arch/x86/kernel/paravirt.c
-@@ -327,7 +327,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
-       .set_debugreg = native_set_debugreg,
-       .read_cr0 = native_read_cr0,
-       .write_cr0 = native_write_cr0,
--      .read_cr4 = native_read_cr4,
-       .write_cr4 = native_write_cr4,
- #ifdef CONFIG_X86_64
-       .read_cr8 = native_read_cr8,
-@@ -343,7 +342,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
-       .set_ldt = native_set_ldt,
-       .load_gdt = native_load_gdt,
-       .load_idt = native_load_idt,
--      .store_idt = native_store_idt,
-       .store_tr = native_store_tr,
-       .load_tls = native_load_tls,
- #ifdef CONFIG_X86_64
-@@ -411,8 +409,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
-       .set_pte = native_set_pte,
-       .set_pte_at = native_set_pte_at,
-       .set_pmd = native_set_pmd,
--      .set_pmd_at = native_set_pmd_at,
--      .pte_update = paravirt_nop,
- 
-       .ptep_modify_prot_start = __ptep_modify_prot_start,
-       .ptep_modify_prot_commit = __ptep_modify_prot_commit,
-@@ -424,7 +420,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
-       .pmd_clear = native_pmd_clear,
- #endif
-       .set_pud = native_set_pud,
--      .set_pud_at = native_set_pud_at,
- 
-       .pmd_val = PTE_IDENT,
-       .make_pmd = PTE_IDENT,
-diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
-index 7b447d126d17..dd4996a96c71 100644
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -5174,7 +5174,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
-       vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
-       vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
- 
--      native_store_idt(&dt);
-+      store_idt(&dt);
-       vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
-       vmx->host_idt_base = dt.address;
- 
-diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
-index 508a708eb9a6..942391b5b639 100644
---- a/arch/x86/mm/pgtable.c
-+++ b/arch/x86/mm/pgtable.c
-@@ -426,10 +426,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
- {
-       int changed = !pte_same(*ptep, entry);
- 
--      if (changed && dirty) {
-+      if (changed && dirty)
-               *ptep = entry;
--              pte_update(vma->vm_mm, address, ptep);
--      }
- 
-       return changed;
- }
-@@ -486,9 +484,6 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
-               ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
-                                        (unsigned long *) &ptep->pte);
- 
--      if (ret)
--              pte_update(vma->vm_mm, addr, ptep);
--
-       return ret;
- }
- 
-diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
-index ae2a2e2d6362..69b9deff7e5c 100644
---- a/arch/x86/xen/enlighten_pv.c
-+++ b/arch/x86/xen/enlighten_pv.c
-@@ -1038,7 +1038,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
-       .read_cr0 = xen_read_cr0,
-       .write_cr0 = xen_write_cr0,
- 
--      .read_cr4 = native_read_cr4,
-       .write_cr4 = xen_write_cr4,
- 
- #ifdef CONFIG_X86_64
-@@ -1073,7 +1072,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
-       .alloc_ldt = xen_alloc_ldt,
-       .free_ldt = xen_free_ldt,
- 
--      .store_idt = native_store_idt,
-       .store_tr = xen_store_tr,
- 
-       .write_ldt_entry = xen_write_ldt_entry,
-diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
-index cab28cf2cffb..5f61b7e2e6b2 100644
---- a/arch/x86/xen/mmu_pv.c
-+++ b/arch/x86/xen/mmu_pv.c
-@@ -2430,8 +2430,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
-       .flush_tlb_single = xen_flush_tlb_single,
-       .flush_tlb_others = xen_flush_tlb_others,
- 
--      .pte_update = paravirt_nop,
--
-       .pgd_alloc = xen_pgd_alloc,
-       .pgd_free = xen_pgd_free,
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0032-x86-xen-Get-rid-of-paravirt-op-adjust_exception_fram.patch b/patches/kernel/0032-x86-xen-Get-rid-of-paravirt-op-adjust_exception_fram.patch

new file mode 100644 (file)

index 0000000..c58c59c
--- /dev/null
+++ b/patches/kernel/0032-x86-xen-Get-rid-of-paravirt-op-adjust_exception_fram.patch
@@ -0,0 +1,436 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 31 Aug 2017 19:42:49 +0200
+Subject: [PATCH] x86/xen: Get rid of paravirt op adjust_exception_frame
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+When running as Xen pv-guest the exception frame on the stack contains
+%r11 and %rcx additional to the other data pushed by the processor.
+
+Instead of having a paravirt op being called for each exception type
+prepend the Xen specific code to each exception entry. When running as
+Xen pv-guest just use the exception entry with prepended instructions,
+otherwise use the entry without the Xen specific code.
+
+[ tglx: Merged through tip to avoid ugly merge conflict ]
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: xen-devel@lists.xenproject.org
+Cc: boris.ostrovsky@oracle.com
+Cc: luto@amacapital.net
+Link: http://lkml.kernel.org/r/20170831174249.26853-1-jg@pfupf.net
+(backported from commit 5878d5d6fdef6447d73b0acc121ba445bef37f53)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9a6fb927deb3ebbe831741ca82081714637181a7)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/paravirt.h       |  5 --
+ arch/x86/include/asm/paravirt_types.h |  3 --
+ arch/x86/include/asm/proto.h          |  3 ++
+ arch/x86/include/asm/traps.h          | 28 ++++++++--
+ arch/x86/xen/xen-ops.h                |  1 -
+ arch/x86/kernel/asm-offsets_64.c      |  1 -
+ arch/x86/kernel/paravirt.c            |  3 --
+ arch/x86/xen/enlighten_pv.c           | 98 +++++++++++++++++++++++------------
+ arch/x86/xen/irq.c                    |  3 --
+ arch/x86/entry/entry_64.S             | 23 ++------
+ arch/x86/entry/entry_64_compat.S      |  1 -
+ arch/x86/xen/xen-asm_64.S             | 41 +++++++++++++--
+ 12 files changed, 133 insertions(+), 77 deletions(-)
+
+diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
+index 9ccac1926587..c25dd22f7c70 100644
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -960,11 +960,6 @@ extern void default_banner(void);
+ #define GET_CR2_INTO_RAX                              \
+       call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
+ 
+-#define PARAVIRT_ADJUST_EXCEPTION_FRAME                                       \
+-      PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
+-                CLBR_NONE,                                            \
+-                call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
+-
+ #define USERGS_SYSRET64                                                       \
+       PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
+                 CLBR_NONE,                                            \
+diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
+index 9ffc36bfe4cd..6b64fc6367f2 100644
+--- a/arch/x86/include/asm/paravirt_types.h
++++ b/arch/x86/include/asm/paravirt_types.h
+@@ -196,9 +196,6 @@ struct pv_irq_ops {
+       void (*safe_halt)(void);
+       void (*halt)(void);
+ 
+-#ifdef CONFIG_X86_64
+-      void (*adjust_exception_frame)(void);
+-#endif
+ } __no_randomize_layout;
+ 
+ struct pv_mmu_ops {
+diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
+index 8d3964fc5f91..b408b1886195 100644
+--- a/arch/x86/include/asm/proto.h
++++ b/arch/x86/include/asm/proto.h
+@@ -24,6 +24,9 @@ void entry_SYSENTER_compat(void);
+ void __end_entry_SYSENTER_compat(void);
+ void entry_SYSCALL_compat(void);
+ void entry_INT80_compat(void);
++#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
++void xen_entry_INT80_compat(void);
++#endif
+ #endif
+ 
+ void x86_configure_nx(void);
+diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
+index b4f322d6c95f..feb89dbe359d 100644
+--- a/arch/x86/include/asm/traps.h
++++ b/arch/x86/include/asm/traps.h
+@@ -13,9 +13,6 @@ asmlinkage void divide_error(void);
+ asmlinkage void debug(void);
+ asmlinkage void nmi(void);
+ asmlinkage void int3(void);
+-asmlinkage void xen_debug(void);
+-asmlinkage void xen_int3(void);
+-asmlinkage void xen_stack_segment(void);
+ asmlinkage void overflow(void);
+ asmlinkage void bounds(void);
+ asmlinkage void invalid_op(void);
+@@ -56,6 +53,31 @@ asmlinkage void simd_coprocessor_error(void);
+ #define trace_page_fault page_fault
+ #endif
+ 
++#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
++asmlinkage void xen_divide_error(void);
++asmlinkage void xen_xendebug(void);
++asmlinkage void xen_xenint3(void);
++asmlinkage void xen_nmi(void);
++asmlinkage void xen_overflow(void);
++asmlinkage void xen_bounds(void);
++asmlinkage void xen_invalid_op(void);
++asmlinkage void xen_device_not_available(void);
++asmlinkage void xen_double_fault(void);
++asmlinkage void xen_coprocessor_segment_overrun(void);
++asmlinkage void xen_invalid_TSS(void);
++asmlinkage void xen_segment_not_present(void);
++asmlinkage void xen_stack_segment(void);
++asmlinkage void xen_general_protection(void);
++asmlinkage void xen_page_fault(void);
++asmlinkage void xen_spurious_interrupt_bug(void);
++asmlinkage void xen_coprocessor_error(void);
++asmlinkage void xen_alignment_check(void);
++#ifdef CONFIG_X86_MCE
++asmlinkage void xen_machine_check(void);
++#endif /* CONFIG_X86_MCE */
++asmlinkage void xen_simd_coprocessor_error(void);
++#endif
++
+ dotraplinkage void do_divide_error(struct pt_regs *, long);
+ dotraplinkage void do_debug(struct pt_regs *, long);
+ dotraplinkage void do_nmi(struct pt_regs *, long);
+diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
+index 70301ac0d414..c8a6d224f7ed 100644
+--- a/arch/x86/xen/xen-ops.h
++++ b/arch/x86/xen/xen-ops.h
+@@ -138,7 +138,6 @@ __visible void xen_restore_fl_direct(unsigned long);
+ __visible void xen_iret(void);
+ __visible void xen_sysret32(void);
+ __visible void xen_sysret64(void);
+-__visible void xen_adjust_exception_frame(void);
+ 
+ extern int xen_panic_handler_init(void);
+ 
+diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
+index 99332f550c48..cf42206926af 100644
+--- a/arch/x86/kernel/asm-offsets_64.c
++++ b/arch/x86/kernel/asm-offsets_64.c
+@@ -20,7 +20,6 @@ static char syscalls_ia32[] = {
+ int main(void)
+ {
+ #ifdef CONFIG_PARAVIRT
+-      OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
+       OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
+       OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
+       BLANK();
+diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
+index bc0a849589bb..a14df9eecfed 100644
+--- a/arch/x86/kernel/paravirt.c
++++ b/arch/x86/kernel/paravirt.c
+@@ -319,9 +319,6 @@ __visible struct pv_irq_ops pv_irq_ops = {
+       .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
+       .safe_halt = native_safe_halt,
+       .halt = native_halt,
+-#ifdef CONFIG_X86_64
+-      .adjust_exception_frame = paravirt_nop,
+-#endif
+ };
+ 
+ __visible struct pv_cpu_ops pv_cpu_ops = {
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index c76f5ff4d0d7..ae2a2e2d6362 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -586,6 +586,70 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
+       preempt_enable();
+ }
+ 
++#ifdef CONFIG_X86_64
++struct trap_array_entry {
++      void (*orig)(void);
++      void (*xen)(void);
++      bool ist_okay;
++};
++
++static struct trap_array_entry trap_array[] = {
++      { debug,                       xen_xendebug,                    true },
++      { int3,                        xen_xenint3,                     true },
++      { double_fault,                xen_double_fault,                true },
++#ifdef CONFIG_X86_MCE
++      { machine_check,               xen_machine_check,               true },
++#endif
++      { nmi,                         xen_nmi,                         true },
++      { overflow,                    xen_overflow,                    false },
++#ifdef CONFIG_IA32_EMULATION
++      { entry_INT80_compat,          xen_entry_INT80_compat,          false },
++#endif
++      { page_fault,                  xen_page_fault,                  false },
++      { divide_error,                xen_divide_error,                false },
++      { bounds,                      xen_bounds,                      false },
++      { invalid_op,                  xen_invalid_op,                  false },
++      { device_not_available,        xen_device_not_available,        false },
++      { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false },
++      { invalid_TSS,                 xen_invalid_TSS,                 false },
++      { segment_not_present,         xen_segment_not_present,         false },
++      { stack_segment,               xen_stack_segment,               false },
++      { general_protection,          xen_general_protection,          false },
++      { spurious_interrupt_bug,      xen_spurious_interrupt_bug,      false },
++      { coprocessor_error,           xen_coprocessor_error,           false },
++      { alignment_check,             xen_alignment_check,             false },
++      { simd_coprocessor_error,      xen_simd_coprocessor_error,      false },
++};
++
++static bool get_trap_addr(void **addr, unsigned int ist)
++{
++      unsigned int nr;
++      bool ist_okay = false;
++
++      /*
++       * Replace trap handler addresses by Xen specific ones.
++       * Check for known traps using IST and whitelist them.
++       * The debugger ones are the only ones we care about.
++       * Xen will handle faults like double_fault, * so we should never see
++       * them.  Warn if there's an unexpected IST-using fault handler.
++       */
++      for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) {
++              struct trap_array_entry *entry = trap_array + nr;
++
++              if (*addr == entry->orig) {
++                      *addr = entry->xen;
++                      ist_okay = entry->ist_okay;
++                      break;
++              }
++      }
++
++      if (WARN_ON(ist != 0 && !ist_okay))
++              return false;
++
++      return true;
++}
++#endif
++
+ static int cvt_gate_to_trap(int vector, const gate_desc *val,
+                           struct trap_info *info)
+ {
+@@ -598,40 +662,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
+ 
+       addr = gate_offset(val);
+ #ifdef CONFIG_X86_64
+-      /*
+-       * Look for known traps using IST, and substitute them
+-       * appropriately.  The debugger ones are the only ones we care
+-       * about.  Xen will handle faults like double_fault,
+-       * so we should never see them.  Warn if
+-       * there's an unexpected IST-using fault handler.
+-       */
+-      if (addr == (unsigned long)debug)
+-              addr = (unsigned long)xen_debug;
+-      else if (addr == (unsigned long)int3)
+-              addr = (unsigned long)xen_int3;
+-      else if (addr == (unsigned long)stack_segment)
+-              addr = (unsigned long)xen_stack_segment;
+-      else if (addr == (unsigned long)double_fault) {
+-              /* Don't need to handle these */
++      if (!get_trap_addr((void **)&addr, val->bits.ist))
+               return 0;
+-#ifdef CONFIG_X86_MCE
+-      } else if (addr == (unsigned long)machine_check) {
+-              /*
+-               * when xen hypervisor inject vMCE to guest,
+-               * use native mce handler to handle it
+-               */
+-              ;
+-#endif
+-      } else if (addr == (unsigned long)nmi)
+-              /*
+-               * Use the native version as well.
+-               */
+-              ;
+-      else {
+-              /* Some other trap using IST? */
+-              if (WARN_ON(val->bits.ist != 0))
+-                      return 0;
+-      }
+ #endif        /* CONFIG_X86_64 */
+       info->address = addr;
+ 
+diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
+index 33e92955e09d..d4eff5676cfa 100644
+--- a/arch/x86/xen/irq.c
++++ b/arch/x86/xen/irq.c
+@@ -123,9 +123,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
+ 
+       .safe_halt = xen_safe_halt,
+       .halt = xen_halt,
+-#ifdef CONFIG_X86_64
+-      .adjust_exception_frame = xen_adjust_exception_frame,
+-#endif
+ };
+ 
+ void __init xen_init_irq_ops(void)
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index dfabcbf8e813..c12260ef3e4b 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -829,7 +829,6 @@ ENTRY(\sym)
+       .endif
+ 
+       ASM_CLAC
+-      PARAVIRT_ADJUST_EXCEPTION_FRAME
+ 
+       .ifeq \has_error_code
+       pushq   $-1                             /* ORIG_RAX: no syscall to restart */
+@@ -975,7 +974,7 @@ ENTRY(do_softirq_own_stack)
+ ENDPROC(do_softirq_own_stack)
+ 
+ #ifdef CONFIG_XEN
+-idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
++idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
+ 
+ /*
+  * A note on the "critical region" in our callback handler.
+@@ -1042,8 +1041,6 @@ ENTRY(xen_failsafe_callback)
+       movq    8(%rsp), %r11
+       addq    $0x30, %rsp
+       pushq   $0                              /* RIP */
+-      pushq   %r11
+-      pushq   %rcx
+       UNWIND_HINT_IRET_REGS offset=8
+       jmp     general_protection
+ 1:    /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
+@@ -1074,9 +1071,8 @@ idtentry int3                    do_int3                 has_error_code=0        paranoid=1 shift_ist=DEBUG_STACK
+ idtentry stack_segment                do_stack_segment        has_error_code=1
+ 
+ #ifdef CONFIG_XEN
+-idtentry xen_debug            do_debug                has_error_code=0
+-idtentry xen_int3             do_int3                 has_error_code=0
+-idtentry xen_stack_segment    do_stack_segment        has_error_code=1
++idtentry xendebug             do_debug                has_error_code=0
++idtentry xenint3              do_int3                 has_error_code=0
+ #endif
+ 
+ idtentry general_protection   do_general_protection   has_error_code=1
+@@ -1240,20 +1236,9 @@ ENTRY(error_exit)
+ END(error_exit)
+ 
+ /* Runs on exception stack */
++/* XXX: broken on Xen PV */
+ ENTRY(nmi)
+       UNWIND_HINT_IRET_REGS
+-      /*
+-       * Fix up the exception frame if we're on Xen.
+-       * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
+-       * one value to the stack on native, so it may clobber the rdx
+-       * scratch slot, but it won't clobber any of the important
+-       * slots past it.
+-       *
+-       * Xen is a different story, because the Xen frame itself overlaps
+-       * the "NMI executing" variable.
+-       */
+-      PARAVIRT_ADJUST_EXCEPTION_FRAME
+-
+       /*
+        * We allow breakpoints in NMIs. If a breakpoint occurs, then
+        * the iretq it performs will take us out of NMI context.
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index 5314d7b8e5ad..d8468ba24be0 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -293,7 +293,6 @@ ENTRY(entry_INT80_compat)
+       /*
+        * Interrupts are off on entry.
+        */
+-      PARAVIRT_ADJUST_EXCEPTION_FRAME
+       ASM_CLAC                        /* Do this early to minimize exposure */
+       SWAPGS
+ 
+diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
+index 3a3b6a211584..dae2cc33afb5 100644
+--- a/arch/x86/xen/xen-asm_64.S
++++ b/arch/x86/xen/xen-asm_64.S
+@@ -16,11 +16,42 @@
+ 
+ #include <linux/linkage.h>
+ 
+-ENTRY(xen_adjust_exception_frame)
+-      mov 8+0(%rsp), %rcx
+-      mov 8+8(%rsp), %r11
+-      ret $16
+-ENDPROC(xen_adjust_exception_frame)
++.macro xen_pv_trap name
++ENTRY(xen_\name)
++      pop %rcx
++      pop %r11
++      jmp  \name
++END(xen_\name)
++.endm
++
++xen_pv_trap divide_error
++xen_pv_trap debug
++xen_pv_trap xendebug
++xen_pv_trap int3
++xen_pv_trap xenint3
++xen_pv_trap nmi
++xen_pv_trap overflow
++xen_pv_trap bounds
++xen_pv_trap invalid_op
++xen_pv_trap device_not_available
++xen_pv_trap double_fault
++xen_pv_trap coprocessor_segment_overrun
++xen_pv_trap invalid_TSS
++xen_pv_trap segment_not_present
++xen_pv_trap stack_segment
++xen_pv_trap general_protection
++xen_pv_trap page_fault
++xen_pv_trap spurious_interrupt_bug
++xen_pv_trap coprocessor_error
++xen_pv_trap alignment_check
++#ifdef CONFIG_X86_MCE
++xen_pv_trap machine_check
++#endif /* CONFIG_X86_MCE */
++xen_pv_trap simd_coprocessor_error
++#ifdef CONFIG_IA32_EMULATION
++xen_pv_trap entry_INT80_compat
++#endif
++xen_pv_trap hypervisor_callback
+ 
+ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
+ /*
+-- 
+2.14.2
+
diff --git a/patches/kernel/0033-x86-entry-Fix-idtentry-unwind-hint.patch b/patches/kernel/0033-x86-entry-Fix-idtentry-unwind-hint.patch

deleted file mode 100644 (file)

index 13fb2c6..0000000
--- a/patches/kernel/0033-x86-entry-Fix-idtentry-unwind-hint.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Fri, 20 Oct 2017 11:21:33 -0500
-Subject: [PATCH] x86/entry: Fix idtentry unwind hint
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This fixes the following ORC warning in the 'int3' entry code:
-
-  WARNING: can't dereference iret registers at ffff8801c5f17fe0 for ip ffffffff95f0d94b
-
-The ORC metadata had the wrong stack offset for the iret registers.
-
-Their location on the stack is dependent on whether the exception has an
-error code.
-
-Reported-and-tested-by: Andrei Vagin <avagin@virtuozzo.com>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Fixes: 8c1f75587a18 ("x86/entry/64: Add unwind hint annotations")
-Link: http://lkml.kernel.org/r/931d57f0551ed7979d5e7e05370d445c8e5137f8.1508516398.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 98990a33b77dda9babf91cb235654f6729e5702e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 266be2a5053230f6d0b6f27d3e8e9f28df40dd7e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index c12260ef3e4b..2e4fc6425f47 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -821,7 +821,7 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
- 
- .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
- ENTRY(\sym)
--      UNWIND_HINT_IRET_REGS offset=8
-+      UNWIND_HINT_IRET_REGS offset=\has_error_code*8
- 
-       /* Sanity check */
-       .if \shift_ist != -1 && \paranoid == 0
--- 
-2.14.2
-
diff --git a/patches/kernel/0033-x86-paravirt-Remove-no-longer-used-paravirt-function.patch b/patches/kernel/0033-x86-paravirt-Remove-no-longer-used-paravirt-function.patch

new file mode 100644 (file)

index 0000000..516eb30
--- /dev/null
+++ b/patches/kernel/0033-x86-paravirt-Remove-no-longer-used-paravirt-function.patch
@@ -0,0 +1,390 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Mon, 4 Sep 2017 12:25:27 +0200
+Subject: [PATCH] x86/paravirt: Remove no longer used paravirt functions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+With removal of lguest some of the paravirt functions are no longer
+needed:
+
+       ->read_cr4()
+       ->store_idt()
+       ->set_pmd_at()
+       ->set_pud_at()
+       ->pte_update()
+
+Remove them.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: akataria@vmware.com
+Cc: boris.ostrovsky@oracle.com
+Cc: chrisw@sous-sol.org
+Cc: jeremy@goop.org
+Cc: rusty@rustcorp.com.au
+Cc: virtualization@lists.linux-foundation.org
+Cc: xen-devel@lists.xenproject.org
+Link: http://lkml.kernel.org/r/20170904102527.25409-1-jgross@suse.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 87930019c713873a1c3b9bd55dde46e81f70c8f1)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit edf3ab0080a6e79a300753e66929b0b7499eaec5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/desc.h           |  3 +--
+ arch/x86/include/asm/paravirt.h       | 37 -----------------------------------
+ arch/x86/include/asm/paravirt_types.h |  9 ---------
+ arch/x86/include/asm/pgtable.h        | 27 ++++---------------------
+ arch/x86/include/asm/special_insns.h  | 10 +++++-----
+ arch/x86/kernel/paravirt.c            |  5 -----
+ arch/x86/kvm/vmx.c                    |  2 +-
+ arch/x86/mm/pgtable.c                 |  7 +------
+ arch/x86/xen/enlighten_pv.c           |  2 --
+ arch/x86/xen/mmu_pv.c                 |  2 --
+ 10 files changed, 12 insertions(+), 92 deletions(-)
+
+diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
+index 57e502a4e92f..f995e5a09136 100644
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -120,7 +120,6 @@ static inline int desc_empty(const void *ptr)
+ #define load_ldt(ldt)                         asm volatile("lldt %0"::"m" (ldt))
+ 
+ #define store_gdt(dtr)                                native_store_gdt(dtr)
+-#define store_idt(dtr)                                native_store_idt(dtr)
+ #define store_tr(tr)                          (tr = native_store_tr())
+ 
+ #define load_TLS(t, cpu)                      native_load_tls(t, cpu)
+@@ -241,7 +240,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr)
+       asm volatile("sgdt %0":"=m" (*dtr));
+ }
+ 
+-static inline void native_store_idt(struct desc_ptr *dtr)
++static inline void store_idt(struct desc_ptr *dtr)
+ {
+       asm volatile("sidt %0":"=m" (*dtr));
+ }
+diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
+index c25dd22f7c70..12deec722cf0 100644
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x)
+       PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
+ }
+ 
+-static inline unsigned long __read_cr4(void)
+-{
+-      return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
+-}
+-
+ static inline void __write_cr4(unsigned long x)
+ {
+       PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
+@@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries)
+ {
+       PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
+ }
+-static inline void store_idt(struct desc_ptr *dtr)
+-{
+-      PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
+-}
+ static inline unsigned long paravirt_store_tr(void)
+ {
+       return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
+@@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn)
+       PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
+ }
+ 
+-static inline void pte_update(struct mm_struct *mm, unsigned long addr,
+-                            pte_t *ptep)
+-{
+-      PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
+-}
+-
+ static inline pte_t __pte(pteval_t val)
+ {
+       pteval_t ret;
+@@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+               PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
+ }
+ 
+-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+-                            pmd_t *pmdp, pmd_t pmd)
+-{
+-      if (sizeof(pmdval_t) > sizeof(long))
+-              /* 5 arg words */
+-              pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
+-      else
+-              PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
+-                          native_pmd_val(pmd));
+-}
+-
+-static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+-                            pud_t *pudp, pud_t pud)
+-{
+-      if (sizeof(pudval_t) > sizeof(long))
+-              /* 5 arg words */
+-              pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
+-      else
+-              PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
+-                          native_pud_val(pud));
+-}
+-
+ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
+ {
+       pmdval_t val = native_pmd_val(pmd);
+diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
+index 6b64fc6367f2..42873edd9f9d 100644
+--- a/arch/x86/include/asm/paravirt_types.h
++++ b/arch/x86/include/asm/paravirt_types.h
+@@ -107,7 +107,6 @@ struct pv_cpu_ops {
+       unsigned long (*read_cr0)(void);
+       void (*write_cr0)(unsigned long);
+ 
+-      unsigned long (*read_cr4)(void);
+       void (*write_cr4)(unsigned long);
+ 
+ #ifdef CONFIG_X86_64
+@@ -119,8 +118,6 @@ struct pv_cpu_ops {
+       void (*load_tr_desc)(void);
+       void (*load_gdt)(const struct desc_ptr *);
+       void (*load_idt)(const struct desc_ptr *);
+-      /* store_gdt has been removed. */
+-      void (*store_idt)(struct desc_ptr *);
+       void (*set_ldt)(const void *desc, unsigned entries);
+       unsigned long (*store_tr)(void);
+       void (*load_tls)(struct thread_struct *t, unsigned int cpu);
+@@ -245,12 +242,6 @@ struct pv_mmu_ops {
+       void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
+                          pte_t *ptep, pte_t pteval);
+       void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
+-      void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
+-                         pmd_t *pmdp, pmd_t pmdval);
+-      void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
+-                         pud_t *pudp, pud_t pudval);
+-      void (*pte_update)(struct mm_struct *mm, unsigned long addr,
+-                         pte_t *ptep);
+ 
+       pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
+                                       pte_t *ptep);
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index 77037b6f1caa..bb8e9ea7deb4 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -43,8 +43,6 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
+ #else  /* !CONFIG_PARAVIRT */
+ #define set_pte(ptep, pte)            native_set_pte(ptep, pte)
+ #define set_pte_at(mm, addr, ptep, pte)       native_set_pte_at(mm, addr, ptep, pte)
+-#define set_pmd_at(mm, addr, pmdp, pmd)       native_set_pmd_at(mm, addr, pmdp, pmd)
+-#define set_pud_at(mm, addr, pudp, pud)       native_set_pud_at(mm, addr, pudp, pud)
+ 
+ #define set_pte_atomic(ptep, pte)                                     \
+       native_set_pte_atomic(ptep, pte)
+@@ -75,8 +73,6 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
+ #define pte_clear(mm, addr, ptep)     native_pte_clear(mm, addr, ptep)
+ #define pmd_clear(pmd)                        native_pmd_clear(pmd)
+ 
+-#define pte_update(mm, addr, ptep)              do { } while (0)
+-
+ #define pgd_val(x)    native_pgd_val(x)
+ #define __pgd(x)      native_make_pgd(x)
+ 
+@@ -965,31 +961,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
+       native_set_pte(ptep, pte);
+ }
+ 
+-static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
+-                                   pmd_t *pmdp , pmd_t pmd)
++static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
++                            pmd_t *pmdp, pmd_t pmd)
+ {
+       native_set_pmd(pmdp, pmd);
+ }
+ 
+-static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
+-                                   pud_t *pudp, pud_t pud)
++static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
++                            pud_t *pudp, pud_t pud)
+ {
+       native_set_pud(pudp, pud);
+ }
+ 
+-#ifndef CONFIG_PARAVIRT
+-/*
+- * Rules for using pte_update - it must be called after any PTE update which
+- * has not been done using the set_pte / clear_pte interfaces.  It is used by
+- * shadow mode hypervisors to resynchronize the shadow page tables.  Kernel PTE
+- * updates should either be sets, clears, or set_pte_atomic for P->P
+- * transitions, which means this hook should only be called for user PTEs.
+- * This hook implies a P->P protection or access change has taken place, which
+- * requires a subsequent TLB flush.
+- */
+-#define pte_update(mm, addr, ptep)            do { } while (0)
+-#endif
+-
+ /*
+  * We only update the dirty/accessed state if we set
+  * the dirty bit by hand in the kernel, since the hardware
+@@ -1017,7 +1000,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+                                      pte_t *ptep)
+ {
+       pte_t pte = native_ptep_get_and_clear(ptep);
+-      pte_update(mm, addr, ptep);
+       return pte;
+ }
+ 
+@@ -1044,7 +1026,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
+                                     unsigned long addr, pte_t *ptep)
+ {
+       clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
+-      pte_update(mm, addr, ptep);
+ }
+ 
+ #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
+diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
+index 9efaabf5b54b..a24dfcf79f4a 100644
+--- a/arch/x86/include/asm/special_insns.h
++++ b/arch/x86/include/asm/special_insns.h
+@@ -135,6 +135,11 @@ static inline void native_wbinvd(void)
+ 
+ extern asmlinkage void native_load_gs_index(unsigned);
+ 
++static inline unsigned long __read_cr4(void)
++{
++      return native_read_cr4();
++}
++
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #else
+@@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x)
+       native_write_cr3(x);
+ }
+ 
+-static inline unsigned long __read_cr4(void)
+-{
+-      return native_read_cr4();
+-}
+-
+ static inline void __write_cr4(unsigned long x)
+ {
+       native_write_cr4(x);
+diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
+index a14df9eecfed..19a3e8f961c7 100644
+--- a/arch/x86/kernel/paravirt.c
++++ b/arch/x86/kernel/paravirt.c
+@@ -327,7 +327,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
+       .set_debugreg = native_set_debugreg,
+       .read_cr0 = native_read_cr0,
+       .write_cr0 = native_write_cr0,
+-      .read_cr4 = native_read_cr4,
+       .write_cr4 = native_write_cr4,
+ #ifdef CONFIG_X86_64
+       .read_cr8 = native_read_cr8,
+@@ -343,7 +342,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
+       .set_ldt = native_set_ldt,
+       .load_gdt = native_load_gdt,
+       .load_idt = native_load_idt,
+-      .store_idt = native_store_idt,
+       .store_tr = native_store_tr,
+       .load_tls = native_load_tls,
+ #ifdef CONFIG_X86_64
+@@ -411,8 +409,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
+       .set_pte = native_set_pte,
+       .set_pte_at = native_set_pte_at,
+       .set_pmd = native_set_pmd,
+-      .set_pmd_at = native_set_pmd_at,
+-      .pte_update = paravirt_nop,
+ 
+       .ptep_modify_prot_start = __ptep_modify_prot_start,
+       .ptep_modify_prot_commit = __ptep_modify_prot_commit,
+@@ -424,7 +420,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
+       .pmd_clear = native_pmd_clear,
+ #endif
+       .set_pud = native_set_pud,
+-      .set_pud_at = native_set_pud_at,
+ 
+       .pmd_val = PTE_IDENT,
+       .make_pmd = PTE_IDENT,
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 7b447d126d17..dd4996a96c71 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -5174,7 +5174,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
+       vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
+       vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
+ 
+-      native_store_idt(&dt);
++      store_idt(&dt);
+       vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
+       vmx->host_idt_base = dt.address;
+ 
+diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
+index 508a708eb9a6..942391b5b639 100644
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -426,10 +426,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
+ {
+       int changed = !pte_same(*ptep, entry);
+ 
+-      if (changed && dirty) {
++      if (changed && dirty)
+               *ptep = entry;
+-              pte_update(vma->vm_mm, address, ptep);
+-      }
+ 
+       return changed;
+ }
+@@ -486,9 +484,6 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
+               ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
+                                        (unsigned long *) &ptep->pte);
+ 
+-      if (ret)
+-              pte_update(vma->vm_mm, addr, ptep);
+-
+       return ret;
+ }
+ 
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index ae2a2e2d6362..69b9deff7e5c 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -1038,7 +1038,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
+       .read_cr0 = xen_read_cr0,
+       .write_cr0 = xen_write_cr0,
+ 
+-      .read_cr4 = native_read_cr4,
+       .write_cr4 = xen_write_cr4,
+ 
+ #ifdef CONFIG_X86_64
+@@ -1073,7 +1072,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
+       .alloc_ldt = xen_alloc_ldt,
+       .free_ldt = xen_free_ldt,
+ 
+-      .store_idt = native_store_idt,
+       .store_tr = xen_store_tr,
+ 
+       .write_ldt_entry = xen_write_ldt_entry,
+diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
+index cab28cf2cffb..5f61b7e2e6b2 100644
+--- a/arch/x86/xen/mmu_pv.c
++++ b/arch/x86/xen/mmu_pv.c
+@@ -2430,8 +2430,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
+       .flush_tlb_single = xen_flush_tlb_single,
+       .flush_tlb_others = xen_flush_tlb_others,
+ 
+-      .pte_update = paravirt_nop,
+-
+       .pgd_alloc = xen_pgd_alloc,
+       .pgd_free = xen_pgd_free,
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0034-x86-entry-Fix-idtentry-unwind-hint.patch b/patches/kernel/0034-x86-entry-Fix-idtentry-unwind-hint.patch

new file mode 100644 (file)

index 0000000..13fb2c6
--- /dev/null
+++ b/patches/kernel/0034-x86-entry-Fix-idtentry-unwind-hint.patch
@@ -0,0 +1,53 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Fri, 20 Oct 2017 11:21:33 -0500
+Subject: [PATCH] x86/entry: Fix idtentry unwind hint
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This fixes the following ORC warning in the 'int3' entry code:
+
+  WARNING: can't dereference iret registers at ffff8801c5f17fe0 for ip ffffffff95f0d94b
+
+The ORC metadata had the wrong stack offset for the iret registers.
+
+Their location on the stack is dependent on whether the exception has an
+error code.
+
+Reported-and-tested-by: Andrei Vagin <avagin@virtuozzo.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Fixes: 8c1f75587a18 ("x86/entry/64: Add unwind hint annotations")
+Link: http://lkml.kernel.org/r/931d57f0551ed7979d5e7e05370d445c8e5137f8.1508516398.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 98990a33b77dda9babf91cb235654f6729e5702e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 266be2a5053230f6d0b6f27d3e8e9f28df40dd7e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index c12260ef3e4b..2e4fc6425f47 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -821,7 +821,7 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
+ 
+ .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+ ENTRY(\sym)
+-      UNWIND_HINT_IRET_REGS offset=8
++      UNWIND_HINT_IRET_REGS offset=\has_error_code*8
+ 
+       /* Sanity check */
+       .if \shift_ist != -1 && \paranoid == 0
+-- 
+2.14.2
+
diff --git a/patches/kernel/0034-x86-mm-64-Initialize-CR4.PCIDE-early.patch b/patches/kernel/0034-x86-mm-64-Initialize-CR4.PCIDE-early.patch

deleted file mode 100644 (file)

index 15f8a3e..0000000
--- a/patches/kernel/0034-x86-mm-64-Initialize-CR4.PCIDE-early.patch
+++ /dev/null
@@ -1,237 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 10 Sep 2017 17:48:27 -0700
-Subject: [PATCH] x86/mm/64: Initialize CR4.PCIDE early
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-cpu_init() is weird: it's called rather late (after early
-identification and after most MMU state is initialized) on the boot
-CPU but is called extremely early (before identification) on secondary
-CPUs.  It's called just late enough on the boot CPU that its CR4 value
-isn't propagated to mmu_cr4_features.
-
-Even if we put CR4.PCIDE into mmu_cr4_features, we'd hit two
-problems.  First, we'd crash in the trampoline code.  That's
-fixable, and I tried that.  It turns out that mmu_cr4_features is
-totally ignored by secondary_start_64(), though, so even with the
-trampoline code fixed, it wouldn't help.
-
-This means that we don't currently have CR4.PCIDE reliably initialized
-before we start playing with cpu_tlbstate.  This is very fragile and
-tends to cause boot failures if I make even small changes to the TLB
-handling code.
-
-Make it more robust: initialize CR4.PCIDE earlier on the boot CPU
-and propagate it to secondary CPUs in start_secondary().
-
-( Yes, this is ugly.  I think we should have improved mmu_cr4_features
-  to actually control CR4 during secondary bootup, but that would be
-  fairly intrusive at this stage. )
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reported-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
-Tested-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-kernel@vger.kernel.org
-Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems")
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit c7ad5ad297e644601747d6dbee978bf85e14f7bc)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 0e6a37a43aa876327e7d21881c09977da2d5c270)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/common.c | 49 +++++++-------------------------------------
- arch/x86/kernel/setup.c      |  5 ++++-
- arch/x86/kernel/smpboot.c    |  8 +++++---
- arch/x86/mm/init.c           | 34 ++++++++++++++++++++++++++++++
- 4 files changed, 50 insertions(+), 46 deletions(-)
-
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 0b80ed14ff52..4be7b209a3d6 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s)
- __setup("nompx", x86_mpx_setup);
- 
- #ifdef CONFIG_X86_64
--static int __init x86_pcid_setup(char *s)
-+static int __init x86_nopcid_setup(char *s)
- {
--      /* require an exact match without trailing characters */
--      if (strlen(s))
--              return 0;
-+      /* nopcid doesn't accept parameters */
-+      if (s)
-+              return -EINVAL;
- 
-       /* do not emit a message if the feature is not present */
-       if (!boot_cpu_has(X86_FEATURE_PCID))
--              return 1;
-+              return 0;
- 
-       setup_clear_cpu_cap(X86_FEATURE_PCID);
-       pr_info("nopcid: PCID feature disabled\n");
--      return 1;
-+      return 0;
- }
--__setup("nopcid", x86_pcid_setup);
-+early_param("nopcid", x86_nopcid_setup);
- #endif
- 
- static int __init x86_noinvpcid_setup(char *s)
-@@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
-       }
- }
- 
--static void setup_pcid(struct cpuinfo_x86 *c)
--{
--      if (cpu_has(c, X86_FEATURE_PCID)) {
--              if (cpu_has(c, X86_FEATURE_PGE)) {
--                      /*
--                       * We'd like to use cr4_set_bits_and_update_boot(),
--                       * but we can't.  CR4.PCIDE is special and can only
--                       * be set in long mode, and the early CPU init code
--                       * doesn't know this and would try to restore CR4.PCIDE
--                       * prior to entering long mode.
--                       *
--                       * Instead, we rely on the fact that hotplug, resume,
--                       * etc all fully restore CR4 before they write anything
--                       * that could have nonzero PCID bits to CR3.  CR4.PCIDE
--                       * has no effect on the page tables themselves, so we
--                       * don't need it to be restored early.
--                       */
--                      cr4_set_bits(X86_CR4_PCIDE);
--              } else {
--                      /*
--                       * flush_tlb_all(), as currently implemented, won't
--                       * work if PCID is on but PGE is not.  Since that
--                       * combination doesn't exist on real hardware, there's
--                       * no reason to try to fully support it, but it's
--                       * polite to avoid corrupting data if we're on
--                       * an improperly configured VM.
--                       */
--                      clear_cpu_cap(c, X86_FEATURE_PCID);
--              }
--      }
--}
--
- /*
-  * Protection Keys are not available in 32-bit mode.
-  */
-@@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
-       setup_smep(c);
-       setup_smap(c);
- 
--      /* Set up PCID */
--      setup_pcid(c);
--
-       /*
-        * The vendor-specific functions might have changed features.
-        * Now we do "generic changes."
-diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
-index d7e8b983aa72..f964bfddfefd 100644
---- a/arch/x86/kernel/setup.c
-+++ b/arch/x86/kernel/setup.c
-@@ -1174,8 +1174,11 @@ void __init setup_arch(char **cmdline_p)
-        * with the current CR4 value.  This may not be necessary, but
-        * auditing all the early-boot CR4 manipulation would be needed to
-        * rule it out.
-+       *
-+       * Mask off features that don't work outside long mode (just
-+       * PCIDE for now).
-        */
--      mmu_cr4_features = __read_cr4();
-+      mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE;
- 
-       memblock_set_current_limit(get_max_mapped());
- 
-diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
-index 893fd8c849e2..d05006f6c31c 100644
---- a/arch/x86/kernel/smpboot.c
-+++ b/arch/x86/kernel/smpboot.c
-@@ -227,10 +227,12 @@ static int enable_start_cpu0;
- static void notrace start_secondary(void *unused)
- {
-       /*
--       * Don't put *anything* before cpu_init(), SMP booting is too
--       * fragile that we want to limit the things done here to the
--       * most necessary things.
-+       * Don't put *anything* except direct CPU state initialization
-+       * before cpu_init(), SMP booting is too fragile that we want to
-+       * limit the things done here to the most necessary things.
-        */
-+      if (boot_cpu_has(X86_FEATURE_PCID))
-+              __write_cr4(__read_cr4() | X86_CR4_PCIDE);
-       cpu_init();
-       x86_cpuinit.early_percpu_clock_init();
-       preempt_disable();
-diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
-index bf3f1065d6ad..df2624b091a7 100644
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -19,6 +19,7 @@
- #include <asm/microcode.h>
- #include <asm/kaslr.h>
- #include <asm/hypervisor.h>
-+#include <asm/cpufeature.h>
- 
- /*
-  * We need to define the tracepoints somewhere, and tlb.c
-@@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void)
-       }
- }
- 
-+static void setup_pcid(void)
-+{
-+#ifdef CONFIG_X86_64
-+      if (boot_cpu_has(X86_FEATURE_PCID)) {
-+              if (boot_cpu_has(X86_FEATURE_PGE)) {
-+                      /*
-+                       * This can't be cr4_set_bits_and_update_boot() --
-+                       * the trampoline code can't handle CR4.PCIDE and
-+                       * it wouldn't do any good anyway.  Despite the name,
-+                       * cr4_set_bits_and_update_boot() doesn't actually
-+                       * cause the bits in question to remain set all the
-+                       * way through the secondary boot asm.
-+                       *
-+                       * Instead, we brute-force it and set CR4.PCIDE
-+                       * manually in start_secondary().
-+                       */
-+                      cr4_set_bits(X86_CR4_PCIDE);
-+              } else {
-+                      /*
-+                       * flush_tlb_all(), as currently implemented, won't
-+                       * work if PCID is on but PGE is not.  Since that
-+                       * combination doesn't exist on real hardware, there's
-+                       * no reason to try to fully support it, but it's
-+                       * polite to avoid corrupting data if we're on
-+                       * an improperly configured VM.
-+                       */
-+                      setup_clear_cpu_cap(X86_FEATURE_PCID);
-+              }
-+      }
-+#endif
-+}
-+
- #ifdef CONFIG_X86_32
- #define NR_RANGE_MR 3
- #else /* CONFIG_X86_64 */
-@@ -592,6 +625,7 @@ void __init init_mem_mapping(void)
-       unsigned long end;
- 
-       probe_page_size_mask();
-+      setup_pcid();
- 
- #ifdef CONFIG_X86_64
-       end = max_pfn << PAGE_SHIFT;
--- 
-2.14.2
-
diff --git a/patches/kernel/0035-objtool-Add-ORC-unwind-table-generation.patch b/patches/kernel/0035-objtool-Add-ORC-unwind-table-generation.patch

deleted file mode 100644 (file)

index f4bce26..0000000
--- a/patches/kernel/0035-objtool-Add-ORC-unwind-table-generation.patch
+++ /dev/null
@@ -1,1339 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Tue, 11 Jul 2017 10:33:42 -0500
-Subject: [PATCH] objtool: Add ORC unwind table generation
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Now that objtool knows the states of all registers on the stack for each
-instruction, it's straightforward to generate debuginfo for an unwinder
-to use.
-
-Instead of generating DWARF, generate a new format called ORC, which is
-more suitable for an in-kernel unwinder.  See
-Documentation/x86/orc-unwinder.txt for a more detailed description of
-this new debuginfo format and why it's preferable to DWARF.
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mike Galbraith <efault@gmx.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: live-patching@vger.kernel.org
-Link: http://lkml.kernel.org/r/c9b9f01ba6c5ed2bdc9bb0957b78167fdbf9632e.1499786555.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 627fce14809ba5610b0cb476cd0186d3fcedecfc)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9460f7766786ad0f8330f78f22b81842632a5398)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/objtool/Documentation/stack-validation.txt |  56 ++----
- tools/objtool/builtin.h                          |   1 +
- tools/objtool/check.h                            |  15 +-
- tools/objtool/elf.h                              |  15 +-
- tools/objtool/orc.h                              |  30 ++++
- tools/objtool/orc_types.h                        |  85 +++++++++
- tools/objtool/builtin-check.c                    |   2 +-
- tools/objtool/builtin-orc.c                      |  70 ++++++++
- tools/objtool/check.c                            |  58 +++++-
- tools/objtool/elf.c                              | 212 ++++++++++++++++++++--
- tools/objtool/objtool.c                          |   3 +-
- tools/objtool/orc_dump.c                         | 212 ++++++++++++++++++++++
- tools/objtool/orc_gen.c                          | 214 +++++++++++++++++++++++
- tools/objtool/Build                              |   3 +
- 14 files changed, 916 insertions(+), 60 deletions(-)
- create mode 100644 tools/objtool/orc.h
- create mode 100644 tools/objtool/orc_types.h
- create mode 100644 tools/objtool/builtin-orc.c
- create mode 100644 tools/objtool/orc_dump.c
- create mode 100644 tools/objtool/orc_gen.c
-
-diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
-index 17c1195f11f4..6a1af43862df 100644
---- a/tools/objtool/Documentation/stack-validation.txt
-+++ b/tools/objtool/Documentation/stack-validation.txt
-@@ -11,9 +11,6 @@ analyzes every .o file and ensures the validity of its stack metadata.
- It enforces a set of rules on asm code and C inline assembly code so
- that stack traces can be reliable.
- 
--Currently it only checks frame pointer usage, but there are plans to add
--CFI validation for C files and CFI generation for asm files.
--
- For each function, it recursively follows all possible code paths and
- validates the correct frame pointer state at each instruction.
- 
-@@ -23,6 +20,10 @@ alternative execution paths to a given instruction (or set of
- instructions).  Similarly, it knows how to follow switch statements, for
- which gcc sometimes uses jump tables.
- 
-+(Objtool also has an 'orc generate' subcommand which generates debuginfo
-+for the ORC unwinder.  See Documentation/x86/orc-unwinder.txt in the
-+kernel tree for more details.)
-+
- 
- Why do we need stack metadata validation?
- -----------------------------------------
-@@ -93,37 +94,14 @@ a) More reliable stack traces for frame pointer enabled kernels
-        or at the very end of the function after the stack frame has been
-        destroyed.  This is an inherent limitation of frame pointers.
- 
--b) 100% reliable stack traces for DWARF enabled kernels
--
--   (NOTE: This is not yet implemented)
--
--   As an alternative to frame pointers, DWARF Call Frame Information
--   (CFI) metadata can be used to walk the stack.  Unlike frame pointers,
--   CFI metadata is out of band.  So it doesn't affect runtime
--   performance and it can be reliable even when interrupts or exceptions
--   are involved.
--
--   For C code, gcc automatically generates DWARF CFI metadata.  But for
--   asm code, generating CFI is a tedious manual approach which requires
--   manually placed .cfi assembler macros to be scattered throughout the
--   code.  It's clumsy and very easy to get wrong, and it makes the real
--   code harder to read.
--
--   Stacktool will improve this situation in several ways.  For code
--   which already has CFI annotations, it will validate them.  For code
--   which doesn't have CFI annotations, it will generate them.  So an
--   architecture can opt to strip out all the manual .cfi annotations
--   from their asm code and have objtool generate them instead.
-+b) ORC (Oops Rewind Capability) unwind table generation
- 
--   We might also add a runtime stack validation debug option where we
--   periodically walk the stack from schedule() and/or an NMI to ensure
--   that the stack metadata is sane and that we reach the bottom of the
--   stack.
-+   An alternative to frame pointers and DWARF, ORC unwind data can be
-+   used to walk the stack.  Unlike frame pointers, ORC data is out of
-+   band.  So it doesn't affect runtime performance and it can be
-+   reliable even when interrupts or exceptions are involved.
- 
--   So the benefit of objtool here will be that external tooling should
--   always show perfect stack traces.  And the same will be true for
--   kernel warning/oops traces if the architecture has a runtime DWARF
--   unwinder.
-+   For more details, see Documentation/x86/orc-unwinder.txt.
- 
- c) Higher live patching compatibility rate
- 
-@@ -211,7 +189,7 @@ they mean, and suggestions for how to fix them.
-    function, add proper frame pointer logic using the FRAME_BEGIN and
-    FRAME_END macros.  Otherwise, if it's not a callable function, remove
-    its ELF function annotation by changing ENDPROC to END, and instead
--   use the manual CFI hint macros in asm/undwarf.h.
-+   use the manual unwind hint macros in asm/unwind_hints.h.
- 
-    If it's a GCC-compiled .c file, the error may be because the function
-    uses an inline asm() statement which has a "call" instruction.  An
-@@ -231,8 +209,8 @@ they mean, and suggestions for how to fix them.
-    If the error is for an asm file, and the instruction is inside (or
-    reachable from) a callable function, the function should be annotated
-    with the ENTRY/ENDPROC macros (ENDPROC is the important one).
--   Otherwise, the code should probably be annotated with the CFI hint
--   macros in asm/undwarf.h so objtool and the unwinder can know the
-+   Otherwise, the code should probably be annotated with the unwind hint
-+   macros in asm/unwind_hints.h so objtool and the unwinder can know the
-    stack state associated with the code.
- 
-    If you're 100% sure the code won't affect stack traces, or if you're
-@@ -258,7 +236,7 @@ they mean, and suggestions for how to fix them.
-    instructions aren't allowed in a callable function, and are most
-    likely part of the kernel entry code.  They should usually not have
-    the callable function annotation (ENDPROC) and should always be
--   annotated with the CFI hint macros in asm/undwarf.h.
-+   annotated with the unwind hint macros in asm/unwind_hints.h.
- 
- 
- 6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
-@@ -272,7 +250,7 @@ they mean, and suggestions for how to fix them.
- 
-    If the instruction is not actually in a callable function (e.g.
-    kernel entry code), change ENDPROC to END and annotate manually with
--   the CFI hint macros in asm/undwarf.h.
-+   the unwind hint macros in asm/unwind_hints.h.
- 
- 
- 7. file: warning: objtool: func()+0x5c: stack state mismatch
-@@ -288,8 +266,8 @@ they mean, and suggestions for how to fix them.
- 
-    Another possibility is that the code has some asm or inline asm which
-    does some unusual things to the stack or the frame pointer.  In such
--   cases it's probably appropriate to use the CFI hint macros in
--   asm/undwarf.h.
-+   cases it's probably appropriate to use the unwind hint macros in
-+   asm/unwind_hints.h.
- 
- 
- 8. file.o: warning: objtool: funcA() falls through to next function funcB()
-diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
-index 34d2ba78a616..dd526067fed5 100644
---- a/tools/objtool/builtin.h
-+++ b/tools/objtool/builtin.h
-@@ -18,5 +18,6 @@
- #define _BUILTIN_H
- 
- extern int cmd_check(int argc, const char **argv);
-+extern int cmd_orc(int argc, const char **argv);
- 
- #endif /* _BUILTIN_H */
-diff --git a/tools/objtool/check.h b/tools/objtool/check.h
-index da85f5b00ec6..046874bbe226 100644
---- a/tools/objtool/check.h
-+++ b/tools/objtool/check.h
-@@ -22,12 +22,14 @@
- #include "elf.h"
- #include "cfi.h"
- #include "arch.h"
-+#include "orc.h"
- #include <linux/hashtable.h>
- 
- struct insn_state {
-       struct cfi_reg cfa;
-       struct cfi_reg regs[CFI_NUM_REGS];
-       int stack_size;
-+      unsigned char type;
-       bool bp_scratch;
-       bool drap;
-       int drap_reg;
-@@ -48,6 +50,7 @@ struct instruction {
-       struct symbol *func;
-       struct stack_op stack_op;
-       struct insn_state state;
-+      struct orc_entry orc;
- };
- 
- struct objtool_file {
-@@ -58,9 +61,19 @@ struct objtool_file {
-       bool ignore_unreachables, c_file;
- };
- 
--int check(const char *objname, bool nofp);
-+int check(const char *objname, bool nofp, bool orc);
-+
-+struct instruction *find_insn(struct objtool_file *file,
-+                            struct section *sec, unsigned long offset);
- 
- #define for_each_insn(file, insn)                                     \
-       list_for_each_entry(insn, &file->insn_list, list)
- 
-+#define sec_for_each_insn(file, sec, insn)                            \
-+      for (insn = find_insn(file, sec, 0);                            \
-+           insn && &insn->list != &file->insn_list &&                 \
-+                      insn->sec == sec;                               \
-+           insn = list_next_entry(insn, list))
-+
-+
- #endif /* _CHECK_H */
-diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
-index 343968b778cb..d86e2ff14466 100644
---- a/tools/objtool/elf.h
-+++ b/tools/objtool/elf.h
-@@ -28,6 +28,13 @@
- # define elf_getshdrstrndx elf_getshstrndx
- #endif
- 
-+/*
-+ * Fallback for systems without this "read, mmaping if possible" cmd.
-+ */
-+#ifndef ELF_C_READ_MMAP
-+#define ELF_C_READ_MMAP ELF_C_READ
-+#endif
-+
- struct section {
-       struct list_head list;
-       GElf_Shdr sh;
-@@ -41,6 +48,7 @@ struct section {
-       char *name;
-       int idx;
-       unsigned int len;
-+      bool changed, text;
- };
- 
- struct symbol {
-@@ -75,7 +83,7 @@ struct elf {
- };
- 
- 
--struct elf *elf_open(const char *name);
-+struct elf *elf_open(const char *name, int flags);
- struct section *find_section_by_name(struct elf *elf, const char *name);
- struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
- struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
-@@ -83,6 +91,11 @@ struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
- struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
-                                    unsigned int len);
- struct symbol *find_containing_func(struct section *sec, unsigned long offset);
-+struct section *elf_create_section(struct elf *elf, const char *name, size_t
-+                                 entsize, int nr);
-+struct section *elf_create_rela_section(struct elf *elf, struct section *base);
-+int elf_rebuild_rela_section(struct section *sec);
-+int elf_write(struct elf *elf);
- void elf_close(struct elf *elf);
- 
- #define for_each_sec(file, sec)                                               \
-diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h
-new file mode 100644
-index 000000000000..a4139e386ef3
---- /dev/null
-+++ b/tools/objtool/orc.h
-@@ -0,0 +1,30 @@
-+/*
-+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License
-+ * as published by the Free Software Foundation; either version 2
-+ * of the License, or (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
-+ */
-+
-+#ifndef _ORC_H
-+#define _ORC_H
-+
-+#include "orc_types.h"
-+
-+struct objtool_file;
-+
-+int create_orc(struct objtool_file *file);
-+int create_orc_sections(struct objtool_file *file);
-+
-+int orc_dump(const char *objname);
-+
-+#endif /* _ORC_H */
-diff --git a/tools/objtool/orc_types.h b/tools/objtool/orc_types.h
-new file mode 100644
-index 000000000000..fc5cf6cffd9a
---- /dev/null
-+++ b/tools/objtool/orc_types.h
-@@ -0,0 +1,85 @@
-+/*
-+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License
-+ * as published by the Free Software Foundation; either version 2
-+ * of the License, or (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
-+ */
-+
-+#ifndef _ORC_TYPES_H
-+#define _ORC_TYPES_H
-+
-+#include <linux/types.h>
-+#include <linux/compiler.h>
-+
-+/*
-+ * The ORC_REG_* registers are base registers which are used to find other
-+ * registers on the stack.
-+ *
-+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
-+ * address of the previous frame: the caller's SP before it called the current
-+ * function.
-+ *
-+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
-+ * the current frame.
-+ *
-+ * The most commonly used base registers are SP and BP -- which the previous SP
-+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is
-+ * usually based on.
-+ *
-+ * The rest of the base registers are needed for special cases like entry code
-+ * and GCC realigned stacks.
-+ */
-+#define ORC_REG_UNDEFINED             0
-+#define ORC_REG_PREV_SP                       1
-+#define ORC_REG_DX                    2
-+#define ORC_REG_DI                    3
-+#define ORC_REG_BP                    4
-+#define ORC_REG_SP                    5
-+#define ORC_REG_R10                   6
-+#define ORC_REG_R13                   7
-+#define ORC_REG_BP_INDIRECT           8
-+#define ORC_REG_SP_INDIRECT           9
-+#define ORC_REG_MAX                   15
-+
-+/*
-+ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
-+ * caller's SP right before it made the call).  Used for all callable
-+ * functions, i.e. all C code and all callable asm functions.
-+ *
-+ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
-+ * to a fully populated pt_regs from a syscall, interrupt, or exception.
-+ *
-+ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
-+ * points to the iret return frame.
-+ */
-+#define ORC_TYPE_CALL                 0
-+#define ORC_TYPE_REGS                 1
-+#define ORC_TYPE_REGS_IRET            2
-+
-+/*
-+ * This struct is more or less a vastly simplified version of the DWARF Call
-+ * Frame Information standard.  It contains only the necessary parts of DWARF
-+ * CFI, simplified for ease of access by the in-kernel unwinder.  It tells the
-+ * unwinder how to find the previous SP and BP (and sometimes entry regs) on
-+ * the stack for a given code address.  Each instance of the struct corresponds
-+ * to one or more code locations.
-+ */
-+struct orc_entry {
-+      s16             sp_offset;
-+      s16             bp_offset;
-+      unsigned        sp_reg:4;
-+      unsigned        bp_reg:4;
-+      unsigned        type:2;
-+} __packed;
-+
-+#endif /* _ORC_TYPES_H */
-diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
-index 365c34ecab26..eedf089b1495 100644
---- a/tools/objtool/builtin-check.c
-+++ b/tools/objtool/builtin-check.c
-@@ -52,5 +52,5 @@ int cmd_check(int argc, const char **argv)
- 
-       objname = argv[0];
- 
--      return check(objname, nofp);
-+      return check(objname, nofp, false);
- }
-diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
-new file mode 100644
-index 000000000000..5ca41ab0df48
---- /dev/null
-+++ b/tools/objtool/builtin-orc.c
-@@ -0,0 +1,70 @@
-+/*
-+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License
-+ * as published by the Free Software Foundation; either version 2
-+ * of the License, or (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
-+ */
-+
-+/*
-+ * objtool orc:
-+ *
-+ * This command analyzes a .o file and adds .orc_unwind and .orc_unwind_ip
-+ * sections to it, which is used by the in-kernel ORC unwinder.
-+ *
-+ * This command is a superset of "objtool check".
-+ */
-+
-+#include <string.h>
-+#include <subcmd/parse-options.h>
-+#include "builtin.h"
-+#include "check.h"
-+
-+
-+static const char *orc_usage[] = {
-+      "objtool orc generate [<options>] file.o",
-+      "objtool orc dump file.o",
-+      NULL,
-+};
-+
-+extern const struct option check_options[];
-+extern bool nofp;
-+
-+int cmd_orc(int argc, const char **argv)
-+{
-+      const char *objname;
-+
-+      argc--; argv++;
-+      if (!strncmp(argv[0], "gen", 3)) {
-+              argc = parse_options(argc, argv, check_options, orc_usage, 0);
-+              if (argc != 1)
-+                      usage_with_options(orc_usage, check_options);
-+
-+              objname = argv[0];
-+
-+              return check(objname, nofp, true);
-+
-+      }
-+
-+      if (!strcmp(argv[0], "dump")) {
-+              if (argc != 2)
-+                      usage_with_options(orc_usage, check_options);
-+
-+              objname = argv[1];
-+
-+              return orc_dump(objname);
-+      }
-+
-+      usage_with_options(orc_usage, check_options);
-+
-+      return 0;
-+}
-diff --git a/tools/objtool/check.c b/tools/objtool/check.c
-index 2c6d74880403..cb57c526ba17 100644
---- a/tools/objtool/check.c
-+++ b/tools/objtool/check.c
-@@ -36,8 +36,8 @@ const char *objname;
- static bool nofp;
- struct cfi_state initial_func_cfi;
- 
--static struct instruction *find_insn(struct objtool_file *file,
--                                   struct section *sec, unsigned long offset)
-+struct instruction *find_insn(struct objtool_file *file,
-+                            struct section *sec, unsigned long offset)
- {
-       struct instruction *insn;
- 
-@@ -259,6 +259,11 @@ static int decode_instructions(struct objtool_file *file)
-               if (!(sec->sh.sh_flags & SHF_EXECINSTR))
-                       continue;
- 
-+              if (strcmp(sec->name, ".altinstr_replacement") &&
-+                  strcmp(sec->name, ".altinstr_aux") &&
-+                  strncmp(sec->name, ".discard.", 9))
-+                      sec->text = true;
-+
-               for (offset = 0; offset < sec->len; offset += insn->len) {
-                       insn = malloc(sizeof(*insn));
-                       if (!insn) {
-@@ -947,6 +952,30 @@ static bool has_valid_stack_frame(struct insn_state *state)
-       return false;
- }
- 
-+static int update_insn_state_regs(struct instruction *insn, struct insn_state *state)
-+{
-+      struct cfi_reg *cfa = &state->cfa;
-+      struct stack_op *op = &insn->stack_op;
-+
-+      if (cfa->base != CFI_SP)
-+              return 0;
-+
-+      /* push */
-+      if (op->dest.type == OP_DEST_PUSH)
-+              cfa->offset += 8;
-+
-+      /* pop */
-+      if (op->src.type == OP_SRC_POP)
-+              cfa->offset -= 8;
-+
-+      /* add immediate to sp */
-+      if (op->dest.type == OP_DEST_REG && op->src.type == OP_SRC_ADD &&
-+          op->dest.reg == CFI_SP && op->src.reg == CFI_SP)
-+              cfa->offset -= op->src.offset;
-+
-+      return 0;
-+}
-+
- static void save_reg(struct insn_state *state, unsigned char reg, int base,
-                    int offset)
- {
-@@ -1032,6 +1061,9 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
-               return 0;
-       }
- 
-+      if (state->type == ORC_TYPE_REGS || state->type == ORC_TYPE_REGS_IRET)
-+              return update_insn_state_regs(insn, state);
-+
-       switch (op->dest.type) {
- 
-       case OP_DEST_REG:
-@@ -1323,6 +1355,10 @@ static bool insn_state_match(struct instruction *insn, struct insn_state *state)
-                       break;
-               }
- 
-+      } else if (state1->type != state2->type) {
-+              WARN_FUNC("stack state mismatch: type1=%d type2=%d",
-+                        insn->sec, insn->offset, state1->type, state2->type);
-+
-       } else if (state1->drap != state2->drap ||
-                (state1->drap && state1->drap_reg != state2->drap_reg)) {
-               WARN_FUNC("stack state mismatch: drap1=%d(%d) drap2=%d(%d)",
-@@ -1613,7 +1649,7 @@ static void cleanup(struct objtool_file *file)
-       elf_close(file->elf);
- }
- 
--int check(const char *_objname, bool _nofp)
-+int check(const char *_objname, bool _nofp, bool orc)
- {
-       struct objtool_file file;
-       int ret, warnings = 0;
-@@ -1621,7 +1657,7 @@ int check(const char *_objname, bool _nofp)
-       objname = _objname;
-       nofp = _nofp;
- 
--      file.elf = elf_open(objname);
-+      file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
-       if (!file.elf)
-               return 1;
- 
-@@ -1654,6 +1690,20 @@ int check(const char *_objname, bool _nofp)
-               warnings += ret;
-       }
- 
-+      if (orc) {
-+              ret = create_orc(&file);
-+              if (ret < 0)
-+                      goto out;
-+
-+              ret = create_orc_sections(&file);
-+              if (ret < 0)
-+                      goto out;
-+
-+              ret = elf_write(file.elf);
-+              if (ret < 0)
-+                      goto out;
-+      }
-+
- out:
-       cleanup(&file);
- 
-diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
-index 1a7e8aa2af58..6e9f980a7d26 100644
---- a/tools/objtool/elf.c
-+++ b/tools/objtool/elf.c
-@@ -30,16 +30,6 @@
- #include "elf.h"
- #include "warn.h"
- 
--/*
-- * Fallback for systems without this "read, mmaping if possible" cmd.
-- */
--#ifndef ELF_C_READ_MMAP
--#define ELF_C_READ_MMAP ELF_C_READ
--#endif
--
--#define WARN_ELF(format, ...)                                 \
--      WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
--
- struct section *find_section_by_name(struct elf *elf, const char *name)
- {
-       struct section *sec;
-@@ -349,9 +339,10 @@ static int read_relas(struct elf *elf)
-       return 0;
- }
- 
--struct elf *elf_open(const char *name)
-+struct elf *elf_open(const char *name, int flags)
- {
-       struct elf *elf;
-+      Elf_Cmd cmd;
- 
-       elf_version(EV_CURRENT);
- 
-@@ -364,13 +355,20 @@ struct elf *elf_open(const char *name)
- 
-       INIT_LIST_HEAD(&elf->sections);
- 
--      elf->fd = open(name, O_RDONLY);
-+      elf->fd = open(name, flags);
-       if (elf->fd == -1) {
-               perror("open");
-               goto err;
-       }
- 
--      elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL);
-+      if ((flags & O_ACCMODE) == O_RDONLY)
-+              cmd = ELF_C_READ_MMAP;
-+      else if ((flags & O_ACCMODE) == O_RDWR)
-+              cmd = ELF_C_RDWR;
-+      else /* O_WRONLY */
-+              cmd = ELF_C_WRITE;
-+
-+      elf->elf = elf_begin(elf->fd, cmd, NULL);
-       if (!elf->elf) {
-               WARN_ELF("elf_begin");
-               goto err;
-@@ -397,6 +395,194 @@ struct elf *elf_open(const char *name)
-       return NULL;
- }
- 
-+struct section *elf_create_section(struct elf *elf, const char *name,
-+                                 size_t entsize, int nr)
-+{
-+      struct section *sec, *shstrtab;
-+      size_t size = entsize * nr;
-+      struct Elf_Scn *s;
-+      Elf_Data *data;
-+
-+      sec = malloc(sizeof(*sec));
-+      if (!sec) {
-+              perror("malloc");
-+              return NULL;
-+      }
-+      memset(sec, 0, sizeof(*sec));
-+
-+      INIT_LIST_HEAD(&sec->symbol_list);
-+      INIT_LIST_HEAD(&sec->rela_list);
-+      hash_init(sec->rela_hash);
-+      hash_init(sec->symbol_hash);
-+
-+      list_add_tail(&sec->list, &elf->sections);
-+
-+      s = elf_newscn(elf->elf);
-+      if (!s) {
-+              WARN_ELF("elf_newscn");
-+              return NULL;
-+      }
-+
-+      sec->name = strdup(name);
-+      if (!sec->name) {
-+              perror("strdup");
-+              return NULL;
-+      }
-+
-+      sec->idx = elf_ndxscn(s);
-+      sec->len = size;
-+      sec->changed = true;
-+
-+      sec->data = elf_newdata(s);
-+      if (!sec->data) {
-+              WARN_ELF("elf_newdata");
-+              return NULL;
-+      }
-+
-+      sec->data->d_size = size;
-+      sec->data->d_align = 1;
-+
-+      if (size) {
-+              sec->data->d_buf = malloc(size);
-+              if (!sec->data->d_buf) {
-+                      perror("malloc");
-+                      return NULL;
-+              }
-+              memset(sec->data->d_buf, 0, size);
-+      }
-+
-+      if (!gelf_getshdr(s, &sec->sh)) {
-+              WARN_ELF("gelf_getshdr");
-+              return NULL;
-+      }
-+
-+      sec->sh.sh_size = size;
-+      sec->sh.sh_entsize = entsize;
-+      sec->sh.sh_type = SHT_PROGBITS;
-+      sec->sh.sh_addralign = 1;
-+      sec->sh.sh_flags = SHF_ALLOC;
-+
-+
-+      /* Add section name to .shstrtab */
-+      shstrtab = find_section_by_name(elf, ".shstrtab");
-+      if (!shstrtab) {
-+              WARN("can't find .shstrtab section");
-+              return NULL;
-+      }
-+
-+      s = elf_getscn(elf->elf, shstrtab->idx);
-+      if (!s) {
-+              WARN_ELF("elf_getscn");
-+              return NULL;
-+      }
-+
-+      data = elf_newdata(s);
-+      if (!data) {
-+              WARN_ELF("elf_newdata");
-+              return NULL;
-+      }
-+
-+      data->d_buf = sec->name;
-+      data->d_size = strlen(name) + 1;
-+      data->d_align = 1;
-+
-+      sec->sh.sh_name = shstrtab->len;
-+
-+      shstrtab->len += strlen(name) + 1;
-+      shstrtab->changed = true;
-+
-+      return sec;
-+}
-+
-+struct section *elf_create_rela_section(struct elf *elf, struct section *base)
-+{
-+      char *relaname;
-+      struct section *sec;
-+
-+      relaname = malloc(strlen(base->name) + strlen(".rela") + 1);
-+      if (!relaname) {
-+              perror("malloc");
-+              return NULL;
-+      }
-+      strcpy(relaname, ".rela");
-+      strcat(relaname, base->name);
-+
-+      sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0);
-+      if (!sec)
-+              return NULL;
-+
-+      base->rela = sec;
-+      sec->base = base;
-+
-+      sec->sh.sh_type = SHT_RELA;
-+      sec->sh.sh_addralign = 8;
-+      sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
-+      sec->sh.sh_info = base->idx;
-+      sec->sh.sh_flags = SHF_INFO_LINK;
-+
-+      return sec;
-+}
-+
-+int elf_rebuild_rela_section(struct section *sec)
-+{
-+      struct rela *rela;
-+      int nr, idx = 0, size;
-+      GElf_Rela *relas;
-+
-+      nr = 0;
-+      list_for_each_entry(rela, &sec->rela_list, list)
-+              nr++;
-+
-+      size = nr * sizeof(*relas);
-+      relas = malloc(size);
-+      if (!relas) {
-+              perror("malloc");
-+              return -1;
-+      }
-+
-+      sec->data->d_buf = relas;
-+      sec->data->d_size = size;
-+
-+      sec->sh.sh_size = size;
-+
-+      idx = 0;
-+      list_for_each_entry(rela, &sec->rela_list, list) {
-+              relas[idx].r_offset = rela->offset;
-+              relas[idx].r_addend = rela->addend;
-+              relas[idx].r_info = GELF_R_INFO(rela->sym->idx, rela->type);
-+              idx++;
-+      }
-+
-+      return 0;
-+}
-+
-+int elf_write(struct elf *elf)
-+{
-+      struct section *sec;
-+      Elf_Scn *s;
-+
-+      list_for_each_entry(sec, &elf->sections, list) {
-+              if (sec->changed) {
-+                      s = elf_getscn(elf->elf, sec->idx);
-+                      if (!s) {
-+                              WARN_ELF("elf_getscn");
-+                              return -1;
-+                      }
-+                      if (!gelf_update_shdr (s, &sec->sh)) {
-+                              WARN_ELF("gelf_update_shdr");
-+                              return -1;
-+                      }
-+              }
-+      }
-+
-+      if (elf_update(elf->elf, ELF_C_WRITE) < 0) {
-+              WARN_ELF("elf_update");
-+              return -1;
-+      }
-+
-+      return 0;
-+}
-+
- void elf_close(struct elf *elf)
- {
-       struct section *sec, *tmpsec;
-diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
-index ecc5b1b5d15d..31e0f9143840 100644
---- a/tools/objtool/objtool.c
-+++ b/tools/objtool/objtool.c
-@@ -42,10 +42,11 @@ struct cmd_struct {
- };
- 
- static const char objtool_usage_string[] =
--      "objtool [OPTIONS] COMMAND [ARGS]";
-+      "objtool COMMAND [ARGS]";
- 
- static struct cmd_struct objtool_cmds[] = {
-       {"check",       cmd_check,      "Perform stack metadata validation on an object file" },
-+      {"orc",         cmd_orc,        "Generate in-place ORC unwind tables for an object file" },
- };
- 
- bool help;
-diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
-new file mode 100644
-index 000000000000..36c5bf6a2675
---- /dev/null
-+++ b/tools/objtool/orc_dump.c
-@@ -0,0 +1,212 @@
-+/*
-+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License
-+ * as published by the Free Software Foundation; either version 2
-+ * of the License, or (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
-+ */
-+
-+#include <unistd.h>
-+#include "orc.h"
-+#include "warn.h"
-+
-+static const char *reg_name(unsigned int reg)
-+{
-+      switch (reg) {
-+      case ORC_REG_PREV_SP:
-+              return "prevsp";
-+      case ORC_REG_DX:
-+              return "dx";
-+      case ORC_REG_DI:
-+              return "di";
-+      case ORC_REG_BP:
-+              return "bp";
-+      case ORC_REG_SP:
-+              return "sp";
-+      case ORC_REG_R10:
-+              return "r10";
-+      case ORC_REG_R13:
-+              return "r13";
-+      case ORC_REG_BP_INDIRECT:
-+              return "bp(ind)";
-+      case ORC_REG_SP_INDIRECT:
-+              return "sp(ind)";
-+      default:
-+              return "?";
-+      }
-+}
-+
-+static const char *orc_type_name(unsigned int type)
-+{
-+      switch (type) {
-+      case ORC_TYPE_CALL:
-+              return "call";
-+      case ORC_TYPE_REGS:
-+              return "regs";
-+      case ORC_TYPE_REGS_IRET:
-+              return "iret";
-+      default:
-+              return "?";
-+      }
-+}
-+
-+static void print_reg(unsigned int reg, int offset)
-+{
-+      if (reg == ORC_REG_BP_INDIRECT)
-+              printf("(bp%+d)", offset);
-+      else if (reg == ORC_REG_SP_INDIRECT)
-+              printf("(sp%+d)", offset);
-+      else if (reg == ORC_REG_UNDEFINED)
-+              printf("(und)");
-+      else
-+              printf("%s%+d", reg_name(reg), offset);
-+}
-+
-+int orc_dump(const char *_objname)
-+{
-+      int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0;
-+      struct orc_entry *orc = NULL;
-+      char *name;
-+      unsigned long nr_sections, orc_ip_addr = 0;
-+      size_t shstrtab_idx;
-+      Elf *elf;
-+      Elf_Scn *scn;
-+      GElf_Shdr sh;
-+      GElf_Rela rela;
-+      GElf_Sym sym;
-+      Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL;
-+
-+
-+      objname = _objname;
-+
-+      elf_version(EV_CURRENT);
-+
-+      fd = open(objname, O_RDONLY);
-+      if (fd == -1) {
-+              perror("open");
-+              return -1;
-+      }
-+
-+      elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
-+      if (!elf) {
-+              WARN_ELF("elf_begin");
-+              return -1;
-+      }
-+
-+      if (elf_getshdrnum(elf, &nr_sections)) {
-+              WARN_ELF("elf_getshdrnum");
-+              return -1;
-+      }
-+
-+      if (elf_getshdrstrndx(elf, &shstrtab_idx)) {
-+              WARN_ELF("elf_getshdrstrndx");
-+              return -1;
-+      }
-+
-+      for (i = 0; i < nr_sections; i++) {
-+              scn = elf_getscn(elf, i);
-+              if (!scn) {
-+                      WARN_ELF("elf_getscn");
-+                      return -1;
-+              }
-+
-+              if (!gelf_getshdr(scn, &sh)) {
-+                      WARN_ELF("gelf_getshdr");
-+                      return -1;
-+              }
-+
-+              name = elf_strptr(elf, shstrtab_idx, sh.sh_name);
-+              if (!name) {
-+                      WARN_ELF("elf_strptr");
-+                      return -1;
-+              }
-+
-+              data = elf_getdata(scn, NULL);
-+              if (!data) {
-+                      WARN_ELF("elf_getdata");
-+                      return -1;
-+              }
-+
-+              if (!strcmp(name, ".symtab")) {
-+                      symtab = data;
-+              } else if (!strcmp(name, ".orc_unwind")) {
-+                      orc = data->d_buf;
-+                      orc_size = sh.sh_size;
-+              } else if (!strcmp(name, ".orc_unwind_ip")) {
-+                      orc_ip = data->d_buf;
-+                      orc_ip_addr = sh.sh_addr;
-+              } else if (!strcmp(name, ".rela.orc_unwind_ip")) {
-+                      rela_orc_ip = data;
-+              }
-+      }
-+
-+      if (!symtab || !orc || !orc_ip)
-+              return 0;
-+
-+      if (orc_size % sizeof(*orc) != 0) {
-+              WARN("bad .orc_unwind section size");
-+              return -1;
-+      }
-+
-+      nr_entries = orc_size / sizeof(*orc);
-+      for (i = 0; i < nr_entries; i++) {
-+              if (rela_orc_ip) {
-+                      if (!gelf_getrela(rela_orc_ip, i, &rela)) {
-+                              WARN_ELF("gelf_getrela");
-+                              return -1;
-+                      }
-+
-+                      if (!gelf_getsym(symtab, GELF_R_SYM(rela.r_info), &sym)) {
-+                              WARN_ELF("gelf_getsym");
-+                              return -1;
-+                      }
-+
-+                      scn = elf_getscn(elf, sym.st_shndx);
-+                      if (!scn) {
-+                              WARN_ELF("elf_getscn");
-+                              return -1;
-+                      }
-+
-+                      if (!gelf_getshdr(scn, &sh)) {
-+                              WARN_ELF("gelf_getshdr");
-+                              return -1;
-+                      }
-+
-+                      name = elf_strptr(elf, shstrtab_idx, sh.sh_name);
-+                      if (!name || !*name) {
-+                              WARN_ELF("elf_strptr");
-+                              return -1;
-+                      }
-+
-+                      printf("%s+%lx:", name, rela.r_addend);
-+
-+              } else {
-+                      printf("%lx:", orc_ip_addr + (i * sizeof(int)) + orc_ip[i]);
-+              }
-+
-+
-+              printf(" sp:");
-+
-+              print_reg(orc[i].sp_reg, orc[i].sp_offset);
-+
-+              printf(" bp:");
-+
-+              print_reg(orc[i].bp_reg, orc[i].bp_offset);
-+
-+              printf(" type:%s\n", orc_type_name(orc[i].type));
-+      }
-+
-+      elf_end(elf);
-+      close(fd);
-+
-+      return 0;
-+}
-diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
-new file mode 100644
-index 000000000000..e5ca31429c9b
---- /dev/null
-+++ b/tools/objtool/orc_gen.c
-@@ -0,0 +1,214 @@
-+/*
-+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License
-+ * as published by the Free Software Foundation; either version 2
-+ * of the License, or (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
-+ */
-+
-+#include <stdlib.h>
-+#include <string.h>
-+
-+#include "orc.h"
-+#include "check.h"
-+#include "warn.h"
-+
-+int create_orc(struct objtool_file *file)
-+{
-+      struct instruction *insn;
-+
-+      for_each_insn(file, insn) {
-+              struct orc_entry *orc = &insn->orc;
-+              struct cfi_reg *cfa = &insn->state.cfa;
-+              struct cfi_reg *bp = &insn->state.regs[CFI_BP];
-+
-+              if (cfa->base == CFI_UNDEFINED) {
-+                      orc->sp_reg = ORC_REG_UNDEFINED;
-+                      continue;
-+              }
-+
-+              switch (cfa->base) {
-+              case CFI_SP:
-+                      orc->sp_reg = ORC_REG_SP;
-+                      break;
-+              case CFI_SP_INDIRECT:
-+                      orc->sp_reg = ORC_REG_SP_INDIRECT;
-+                      break;
-+              case CFI_BP:
-+                      orc->sp_reg = ORC_REG_BP;
-+                      break;
-+              case CFI_BP_INDIRECT:
-+                      orc->sp_reg = ORC_REG_BP_INDIRECT;
-+                      break;
-+              case CFI_R10:
-+                      orc->sp_reg = ORC_REG_R10;
-+                      break;
-+              case CFI_R13:
-+                      orc->sp_reg = ORC_REG_R13;
-+                      break;
-+              case CFI_DI:
-+                      orc->sp_reg = ORC_REG_DI;
-+                      break;
-+              case CFI_DX:
-+                      orc->sp_reg = ORC_REG_DX;
-+                      break;
-+              default:
-+                      WARN_FUNC("unknown CFA base reg %d",
-+                                insn->sec, insn->offset, cfa->base);
-+                      return -1;
-+              }
-+
-+              switch(bp->base) {
-+              case CFI_UNDEFINED:
-+                      orc->bp_reg = ORC_REG_UNDEFINED;
-+                      break;
-+              case CFI_CFA:
-+                      orc->bp_reg = ORC_REG_PREV_SP;
-+                      break;
-+              case CFI_BP:
-+                      orc->bp_reg = ORC_REG_BP;
-+                      break;
-+              default:
-+                      WARN_FUNC("unknown BP base reg %d",
-+                                insn->sec, insn->offset, bp->base);
-+                      return -1;
-+              }
-+
-+              orc->sp_offset = cfa->offset;
-+              orc->bp_offset = bp->offset;
-+              orc->type = insn->state.type;
-+      }
-+
-+      return 0;
-+}
-+
-+static int create_orc_entry(struct section *u_sec, struct section *ip_relasec,
-+                              unsigned int idx, struct section *insn_sec,
-+                              unsigned long insn_off, struct orc_entry *o)
-+{
-+      struct orc_entry *orc;
-+      struct rela *rela;
-+
-+      /* populate ORC data */
-+      orc = (struct orc_entry *)u_sec->data->d_buf + idx;
-+      memcpy(orc, o, sizeof(*orc));
-+
-+      /* populate rela for ip */
-+      rela = malloc(sizeof(*rela));
-+      if (!rela) {
-+              perror("malloc");
-+              return -1;
-+      }
-+      memset(rela, 0, sizeof(*rela));
-+
-+      rela->sym = insn_sec->sym;
-+      rela->addend = insn_off;
-+      rela->type = R_X86_64_PC32;
-+      rela->offset = idx * sizeof(int);
-+
-+      list_add_tail(&rela->list, &ip_relasec->rela_list);
-+      hash_add(ip_relasec->rela_hash, &rela->hash, rela->offset);
-+
-+      return 0;
-+}
-+
-+int create_orc_sections(struct objtool_file *file)
-+{
-+      struct instruction *insn, *prev_insn;
-+      struct section *sec, *u_sec, *ip_relasec;
-+      unsigned int idx;
-+
-+      struct orc_entry empty = {
-+              .sp_reg = ORC_REG_UNDEFINED,
-+              .bp_reg  = ORC_REG_UNDEFINED,
-+              .type    = ORC_TYPE_CALL,
-+      };
-+
-+      sec = find_section_by_name(file->elf, ".orc_unwind");
-+      if (sec) {
-+              WARN("file already has .orc_unwind section, skipping");
-+              return -1;
-+      }
-+
-+      /* count the number of needed orcs */
-+      idx = 0;
-+      for_each_sec(file, sec) {
-+              if (!sec->text)
-+                      continue;
-+
-+              prev_insn = NULL;
-+              sec_for_each_insn(file, sec, insn) {
-+                      if (!prev_insn ||
-+                          memcmp(&insn->orc, &prev_insn->orc,
-+                                 sizeof(struct orc_entry))) {
-+                              idx++;
-+                      }
-+                      prev_insn = insn;
-+              }
-+
-+              /* section terminator */
-+              if (prev_insn)
-+                      idx++;
-+      }
-+      if (!idx)
-+              return -1;
-+
-+
-+      /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
-+      sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
-+
-+      ip_relasec = elf_create_rela_section(file->elf, sec);
-+      if (!ip_relasec)
-+              return -1;
-+
-+      /* create .orc_unwind section */
-+      u_sec = elf_create_section(file->elf, ".orc_unwind",
-+                                 sizeof(struct orc_entry), idx);
-+
-+      /* populate sections */
-+      idx = 0;
-+      for_each_sec(file, sec) {
-+              if (!sec->text)
-+                      continue;
-+
-+              prev_insn = NULL;
-+              sec_for_each_insn(file, sec, insn) {
-+                      if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
-+                                               sizeof(struct orc_entry))) {
-+
-+                              if (create_orc_entry(u_sec, ip_relasec, idx,
-+                                                   insn->sec, insn->offset,
-+                                                   &insn->orc))
-+                                      return -1;
-+
-+                              idx++;
-+                      }
-+                      prev_insn = insn;
-+              }
-+
-+              /* section terminator */
-+              if (prev_insn) {
-+                      if (create_orc_entry(u_sec, ip_relasec, idx,
-+                                           prev_insn->sec,
-+                                           prev_insn->offset + prev_insn->len,
-+                                           &empty))
-+                              return -1;
-+
-+                      idx++;
-+              }
-+      }
-+
-+      if (elf_rebuild_rela_section(ip_relasec))
-+              return -1;
-+
-+      return 0;
-+}
-diff --git a/tools/objtool/Build b/tools/objtool/Build
-index 6f2e1987c4d9..749becdf5b90 100644
---- a/tools/objtool/Build
-+++ b/tools/objtool/Build
-@@ -1,6 +1,9 @@
- objtool-y += arch/$(SRCARCH)/
- objtool-y += builtin-check.o
-+objtool-y += builtin-orc.o
- objtool-y += check.o
-+objtool-y += orc_gen.o
-+objtool-y += orc_dump.o
- objtool-y += elf.o
- objtool-y += special.o
- objtool-y += objtool.o
--- 
-2.14.2
-
diff --git a/patches/kernel/0035-x86-mm-64-Initialize-CR4.PCIDE-early.patch b/patches/kernel/0035-x86-mm-64-Initialize-CR4.PCIDE-early.patch

new file mode 100644 (file)

index 0000000..15f8a3e
--- /dev/null
+++ b/patches/kernel/0035-x86-mm-64-Initialize-CR4.PCIDE-early.patch
@@ -0,0 +1,237 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 10 Sep 2017 17:48:27 -0700
+Subject: [PATCH] x86/mm/64: Initialize CR4.PCIDE early
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+cpu_init() is weird: it's called rather late (after early
+identification and after most MMU state is initialized) on the boot
+CPU but is called extremely early (before identification) on secondary
+CPUs.  It's called just late enough on the boot CPU that its CR4 value
+isn't propagated to mmu_cr4_features.
+
+Even if we put CR4.PCIDE into mmu_cr4_features, we'd hit two
+problems.  First, we'd crash in the trampoline code.  That's
+fixable, and I tried that.  It turns out that mmu_cr4_features is
+totally ignored by secondary_start_64(), though, so even with the
+trampoline code fixed, it wouldn't help.
+
+This means that we don't currently have CR4.PCIDE reliably initialized
+before we start playing with cpu_tlbstate.  This is very fragile and
+tends to cause boot failures if I make even small changes to the TLB
+handling code.
+
+Make it more robust: initialize CR4.PCIDE earlier on the boot CPU
+and propagate it to secondary CPUs in start_secondary().
+
+( Yes, this is ugly.  I think we should have improved mmu_cr4_features
+  to actually control CR4 during secondary bootup, but that would be
+  fairly intrusive at this stage. )
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reported-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
+Tested-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-kernel@vger.kernel.org
+Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems")
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit c7ad5ad297e644601747d6dbee978bf85e14f7bc)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 0e6a37a43aa876327e7d21881c09977da2d5c270)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/common.c | 49 +++++++-------------------------------------
+ arch/x86/kernel/setup.c      |  5 ++++-
+ arch/x86/kernel/smpboot.c    |  8 +++++---
+ arch/x86/mm/init.c           | 34 ++++++++++++++++++++++++++++++
+ 4 files changed, 50 insertions(+), 46 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 0b80ed14ff52..4be7b209a3d6 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s)
+ __setup("nompx", x86_mpx_setup);
+ 
+ #ifdef CONFIG_X86_64
+-static int __init x86_pcid_setup(char *s)
++static int __init x86_nopcid_setup(char *s)
+ {
+-      /* require an exact match without trailing characters */
+-      if (strlen(s))
+-              return 0;
++      /* nopcid doesn't accept parameters */
++      if (s)
++              return -EINVAL;
+ 
+       /* do not emit a message if the feature is not present */
+       if (!boot_cpu_has(X86_FEATURE_PCID))
+-              return 1;
++              return 0;
+ 
+       setup_clear_cpu_cap(X86_FEATURE_PCID);
+       pr_info("nopcid: PCID feature disabled\n");
+-      return 1;
++      return 0;
+ }
+-__setup("nopcid", x86_pcid_setup);
++early_param("nopcid", x86_nopcid_setup);
+ #endif
+ 
+ static int __init x86_noinvpcid_setup(char *s)
+@@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
+       }
+ }
+ 
+-static void setup_pcid(struct cpuinfo_x86 *c)
+-{
+-      if (cpu_has(c, X86_FEATURE_PCID)) {
+-              if (cpu_has(c, X86_FEATURE_PGE)) {
+-                      /*
+-                       * We'd like to use cr4_set_bits_and_update_boot(),
+-                       * but we can't.  CR4.PCIDE is special and can only
+-                       * be set in long mode, and the early CPU init code
+-                       * doesn't know this and would try to restore CR4.PCIDE
+-                       * prior to entering long mode.
+-                       *
+-                       * Instead, we rely on the fact that hotplug, resume,
+-                       * etc all fully restore CR4 before they write anything
+-                       * that could have nonzero PCID bits to CR3.  CR4.PCIDE
+-                       * has no effect on the page tables themselves, so we
+-                       * don't need it to be restored early.
+-                       */
+-                      cr4_set_bits(X86_CR4_PCIDE);
+-              } else {
+-                      /*
+-                       * flush_tlb_all(), as currently implemented, won't
+-                       * work if PCID is on but PGE is not.  Since that
+-                       * combination doesn't exist on real hardware, there's
+-                       * no reason to try to fully support it, but it's
+-                       * polite to avoid corrupting data if we're on
+-                       * an improperly configured VM.
+-                       */
+-                      clear_cpu_cap(c, X86_FEATURE_PCID);
+-              }
+-      }
+-}
+-
+ /*
+  * Protection Keys are not available in 32-bit mode.
+  */
+@@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
+       setup_smep(c);
+       setup_smap(c);
+ 
+-      /* Set up PCID */
+-      setup_pcid(c);
+-
+       /*
+        * The vendor-specific functions might have changed features.
+        * Now we do "generic changes."
+diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
+index d7e8b983aa72..f964bfddfefd 100644
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -1174,8 +1174,11 @@ void __init setup_arch(char **cmdline_p)
+        * with the current CR4 value.  This may not be necessary, but
+        * auditing all the early-boot CR4 manipulation would be needed to
+        * rule it out.
++       *
++       * Mask off features that don't work outside long mode (just
++       * PCIDE for now).
+        */
+-      mmu_cr4_features = __read_cr4();
++      mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE;
+ 
+       memblock_set_current_limit(get_max_mapped());
+ 
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index 893fd8c849e2..d05006f6c31c 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -227,10 +227,12 @@ static int enable_start_cpu0;
+ static void notrace start_secondary(void *unused)
+ {
+       /*
+-       * Don't put *anything* before cpu_init(), SMP booting is too
+-       * fragile that we want to limit the things done here to the
+-       * most necessary things.
++       * Don't put *anything* except direct CPU state initialization
++       * before cpu_init(), SMP booting is too fragile that we want to
++       * limit the things done here to the most necessary things.
+        */
++      if (boot_cpu_has(X86_FEATURE_PCID))
++              __write_cr4(__read_cr4() | X86_CR4_PCIDE);
+       cpu_init();
+       x86_cpuinit.early_percpu_clock_init();
+       preempt_disable();
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index bf3f1065d6ad..df2624b091a7 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -19,6 +19,7 @@
+ #include <asm/microcode.h>
+ #include <asm/kaslr.h>
+ #include <asm/hypervisor.h>
++#include <asm/cpufeature.h>
+ 
+ /*
+  * We need to define the tracepoints somewhere, and tlb.c
+@@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void)
+       }
+ }
+ 
++static void setup_pcid(void)
++{
++#ifdef CONFIG_X86_64
++      if (boot_cpu_has(X86_FEATURE_PCID)) {
++              if (boot_cpu_has(X86_FEATURE_PGE)) {
++                      /*
++                       * This can't be cr4_set_bits_and_update_boot() --
++                       * the trampoline code can't handle CR4.PCIDE and
++                       * it wouldn't do any good anyway.  Despite the name,
++                       * cr4_set_bits_and_update_boot() doesn't actually
++                       * cause the bits in question to remain set all the
++                       * way through the secondary boot asm.
++                       *
++                       * Instead, we brute-force it and set CR4.PCIDE
++                       * manually in start_secondary().
++                       */
++                      cr4_set_bits(X86_CR4_PCIDE);
++              } else {
++                      /*
++                       * flush_tlb_all(), as currently implemented, won't
++                       * work if PCID is on but PGE is not.  Since that
++                       * combination doesn't exist on real hardware, there's
++                       * no reason to try to fully support it, but it's
++                       * polite to avoid corrupting data if we're on
++                       * an improperly configured VM.
++                       */
++                      setup_clear_cpu_cap(X86_FEATURE_PCID);
++              }
++      }
++#endif
++}
++
+ #ifdef CONFIG_X86_32
+ #define NR_RANGE_MR 3
+ #else /* CONFIG_X86_64 */
+@@ -592,6 +625,7 @@ void __init init_mem_mapping(void)
+       unsigned long end;
+ 
+       probe_page_size_mask();
++      setup_pcid();
+ 
+ #ifdef CONFIG_X86_64
+       end = max_pfn << PAGE_SHIFT;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0036-objtool-Add-ORC-unwind-table-generation.patch b/patches/kernel/0036-objtool-Add-ORC-unwind-table-generation.patch

new file mode 100644 (file)

index 0000000..f4bce26
--- /dev/null
+++ b/patches/kernel/0036-objtool-Add-ORC-unwind-table-generation.patch
@@ -0,0 +1,1339 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Tue, 11 Jul 2017 10:33:42 -0500
+Subject: [PATCH] objtool: Add ORC unwind table generation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Now that objtool knows the states of all registers on the stack for each
+instruction, it's straightforward to generate debuginfo for an unwinder
+to use.
+
+Instead of generating DWARF, generate a new format called ORC, which is
+more suitable for an in-kernel unwinder.  See
+Documentation/x86/orc-unwinder.txt for a more detailed description of
+this new debuginfo format and why it's preferable to DWARF.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: live-patching@vger.kernel.org
+Link: http://lkml.kernel.org/r/c9b9f01ba6c5ed2bdc9bb0957b78167fdbf9632e.1499786555.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 627fce14809ba5610b0cb476cd0186d3fcedecfc)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9460f7766786ad0f8330f78f22b81842632a5398)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/objtool/Documentation/stack-validation.txt |  56 ++----
+ tools/objtool/builtin.h                          |   1 +
+ tools/objtool/check.h                            |  15 +-
+ tools/objtool/elf.h                              |  15 +-
+ tools/objtool/orc.h                              |  30 ++++
+ tools/objtool/orc_types.h                        |  85 +++++++++
+ tools/objtool/builtin-check.c                    |   2 +-
+ tools/objtool/builtin-orc.c                      |  70 ++++++++
+ tools/objtool/check.c                            |  58 +++++-
+ tools/objtool/elf.c                              | 212 ++++++++++++++++++++--
+ tools/objtool/objtool.c                          |   3 +-
+ tools/objtool/orc_dump.c                         | 212 ++++++++++++++++++++++
+ tools/objtool/orc_gen.c                          | 214 +++++++++++++++++++++++
+ tools/objtool/Build                              |   3 +
+ 14 files changed, 916 insertions(+), 60 deletions(-)
+ create mode 100644 tools/objtool/orc.h
+ create mode 100644 tools/objtool/orc_types.h
+ create mode 100644 tools/objtool/builtin-orc.c
+ create mode 100644 tools/objtool/orc_dump.c
+ create mode 100644 tools/objtool/orc_gen.c
+
+diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
+index 17c1195f11f4..6a1af43862df 100644
+--- a/tools/objtool/Documentation/stack-validation.txt
++++ b/tools/objtool/Documentation/stack-validation.txt
+@@ -11,9 +11,6 @@ analyzes every .o file and ensures the validity of its stack metadata.
+ It enforces a set of rules on asm code and C inline assembly code so
+ that stack traces can be reliable.
+ 
+-Currently it only checks frame pointer usage, but there are plans to add
+-CFI validation for C files and CFI generation for asm files.
+-
+ For each function, it recursively follows all possible code paths and
+ validates the correct frame pointer state at each instruction.
+ 
+@@ -23,6 +20,10 @@ alternative execution paths to a given instruction (or set of
+ instructions).  Similarly, it knows how to follow switch statements, for
+ which gcc sometimes uses jump tables.
+ 
++(Objtool also has an 'orc generate' subcommand which generates debuginfo
++for the ORC unwinder.  See Documentation/x86/orc-unwinder.txt in the
++kernel tree for more details.)
++
+ 
+ Why do we need stack metadata validation?
+ -----------------------------------------
+@@ -93,37 +94,14 @@ a) More reliable stack traces for frame pointer enabled kernels
+        or at the very end of the function after the stack frame has been
+        destroyed.  This is an inherent limitation of frame pointers.
+ 
+-b) 100% reliable stack traces for DWARF enabled kernels
+-
+-   (NOTE: This is not yet implemented)
+-
+-   As an alternative to frame pointers, DWARF Call Frame Information
+-   (CFI) metadata can be used to walk the stack.  Unlike frame pointers,
+-   CFI metadata is out of band.  So it doesn't affect runtime
+-   performance and it can be reliable even when interrupts or exceptions
+-   are involved.
+-
+-   For C code, gcc automatically generates DWARF CFI metadata.  But for
+-   asm code, generating CFI is a tedious manual approach which requires
+-   manually placed .cfi assembler macros to be scattered throughout the
+-   code.  It's clumsy and very easy to get wrong, and it makes the real
+-   code harder to read.
+-
+-   Stacktool will improve this situation in several ways.  For code
+-   which already has CFI annotations, it will validate them.  For code
+-   which doesn't have CFI annotations, it will generate them.  So an
+-   architecture can opt to strip out all the manual .cfi annotations
+-   from their asm code and have objtool generate them instead.
++b) ORC (Oops Rewind Capability) unwind table generation
+ 
+-   We might also add a runtime stack validation debug option where we
+-   periodically walk the stack from schedule() and/or an NMI to ensure
+-   that the stack metadata is sane and that we reach the bottom of the
+-   stack.
++   An alternative to frame pointers and DWARF, ORC unwind data can be
++   used to walk the stack.  Unlike frame pointers, ORC data is out of
++   band.  So it doesn't affect runtime performance and it can be
++   reliable even when interrupts or exceptions are involved.
+ 
+-   So the benefit of objtool here will be that external tooling should
+-   always show perfect stack traces.  And the same will be true for
+-   kernel warning/oops traces if the architecture has a runtime DWARF
+-   unwinder.
++   For more details, see Documentation/x86/orc-unwinder.txt.
+ 
+ c) Higher live patching compatibility rate
+ 
+@@ -211,7 +189,7 @@ they mean, and suggestions for how to fix them.
+    function, add proper frame pointer logic using the FRAME_BEGIN and
+    FRAME_END macros.  Otherwise, if it's not a callable function, remove
+    its ELF function annotation by changing ENDPROC to END, and instead
+-   use the manual CFI hint macros in asm/undwarf.h.
++   use the manual unwind hint macros in asm/unwind_hints.h.
+ 
+    If it's a GCC-compiled .c file, the error may be because the function
+    uses an inline asm() statement which has a "call" instruction.  An
+@@ -231,8 +209,8 @@ they mean, and suggestions for how to fix them.
+    If the error is for an asm file, and the instruction is inside (or
+    reachable from) a callable function, the function should be annotated
+    with the ENTRY/ENDPROC macros (ENDPROC is the important one).
+-   Otherwise, the code should probably be annotated with the CFI hint
+-   macros in asm/undwarf.h so objtool and the unwinder can know the
++   Otherwise, the code should probably be annotated with the unwind hint
++   macros in asm/unwind_hints.h so objtool and the unwinder can know the
+    stack state associated with the code.
+ 
+    If you're 100% sure the code won't affect stack traces, or if you're
+@@ -258,7 +236,7 @@ they mean, and suggestions for how to fix them.
+    instructions aren't allowed in a callable function, and are most
+    likely part of the kernel entry code.  They should usually not have
+    the callable function annotation (ENDPROC) and should always be
+-   annotated with the CFI hint macros in asm/undwarf.h.
++   annotated with the unwind hint macros in asm/unwind_hints.h.
+ 
+ 
+ 6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
+@@ -272,7 +250,7 @@ they mean, and suggestions for how to fix them.
+ 
+    If the instruction is not actually in a callable function (e.g.
+    kernel entry code), change ENDPROC to END and annotate manually with
+-   the CFI hint macros in asm/undwarf.h.
++   the unwind hint macros in asm/unwind_hints.h.
+ 
+ 
+ 7. file: warning: objtool: func()+0x5c: stack state mismatch
+@@ -288,8 +266,8 @@ they mean, and suggestions for how to fix them.
+ 
+    Another possibility is that the code has some asm or inline asm which
+    does some unusual things to the stack or the frame pointer.  In such
+-   cases it's probably appropriate to use the CFI hint macros in
+-   asm/undwarf.h.
++   cases it's probably appropriate to use the unwind hint macros in
++   asm/unwind_hints.h.
+ 
+ 
+ 8. file.o: warning: objtool: funcA() falls through to next function funcB()
+diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
+index 34d2ba78a616..dd526067fed5 100644
+--- a/tools/objtool/builtin.h
++++ b/tools/objtool/builtin.h
+@@ -18,5 +18,6 @@
+ #define _BUILTIN_H
+ 
+ extern int cmd_check(int argc, const char **argv);
++extern int cmd_orc(int argc, const char **argv);
+ 
+ #endif /* _BUILTIN_H */
+diff --git a/tools/objtool/check.h b/tools/objtool/check.h
+index da85f5b00ec6..046874bbe226 100644
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -22,12 +22,14 @@
+ #include "elf.h"
+ #include "cfi.h"
+ #include "arch.h"
++#include "orc.h"
+ #include <linux/hashtable.h>
+ 
+ struct insn_state {
+       struct cfi_reg cfa;
+       struct cfi_reg regs[CFI_NUM_REGS];
+       int stack_size;
++      unsigned char type;
+       bool bp_scratch;
+       bool drap;
+       int drap_reg;
+@@ -48,6 +50,7 @@ struct instruction {
+       struct symbol *func;
+       struct stack_op stack_op;
+       struct insn_state state;
++      struct orc_entry orc;
+ };
+ 
+ struct objtool_file {
+@@ -58,9 +61,19 @@ struct objtool_file {
+       bool ignore_unreachables, c_file;
+ };
+ 
+-int check(const char *objname, bool nofp);
++int check(const char *objname, bool nofp, bool orc);
++
++struct instruction *find_insn(struct objtool_file *file,
++                            struct section *sec, unsigned long offset);
+ 
+ #define for_each_insn(file, insn)                                     \
+       list_for_each_entry(insn, &file->insn_list, list)
+ 
++#define sec_for_each_insn(file, sec, insn)                            \
++      for (insn = find_insn(file, sec, 0);                            \
++           insn && &insn->list != &file->insn_list &&                 \
++                      insn->sec == sec;                               \
++           insn = list_next_entry(insn, list))
++
++
+ #endif /* _CHECK_H */
+diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
+index 343968b778cb..d86e2ff14466 100644
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -28,6 +28,13 @@
+ # define elf_getshdrstrndx elf_getshstrndx
+ #endif
+ 
++/*
++ * Fallback for systems without this "read, mmaping if possible" cmd.
++ */
++#ifndef ELF_C_READ_MMAP
++#define ELF_C_READ_MMAP ELF_C_READ
++#endif
++
+ struct section {
+       struct list_head list;
+       GElf_Shdr sh;
+@@ -41,6 +48,7 @@ struct section {
+       char *name;
+       int idx;
+       unsigned int len;
++      bool changed, text;
+ };
+ 
+ struct symbol {
+@@ -75,7 +83,7 @@ struct elf {
+ };
+ 
+ 
+-struct elf *elf_open(const char *name);
++struct elf *elf_open(const char *name, int flags);
+ struct section *find_section_by_name(struct elf *elf, const char *name);
+ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+ struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
+@@ -83,6 +91,11 @@ struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
+ struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
+                                    unsigned int len);
+ struct symbol *find_containing_func(struct section *sec, unsigned long offset);
++struct section *elf_create_section(struct elf *elf, const char *name, size_t
++                                 entsize, int nr);
++struct section *elf_create_rela_section(struct elf *elf, struct section *base);
++int elf_rebuild_rela_section(struct section *sec);
++int elf_write(struct elf *elf);
+ void elf_close(struct elf *elf);
+ 
+ #define for_each_sec(file, sec)                                               \
+diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h
+new file mode 100644
+index 000000000000..a4139e386ef3
+--- /dev/null
++++ b/tools/objtool/orc.h
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, see <http://www.gnu.org/licenses/>.
++ */
++
++#ifndef _ORC_H
++#define _ORC_H
++
++#include "orc_types.h"
++
++struct objtool_file;
++
++int create_orc(struct objtool_file *file);
++int create_orc_sections(struct objtool_file *file);
++
++int orc_dump(const char *objname);
++
++#endif /* _ORC_H */
+diff --git a/tools/objtool/orc_types.h b/tools/objtool/orc_types.h
+new file mode 100644
+index 000000000000..fc5cf6cffd9a
+--- /dev/null
++++ b/tools/objtool/orc_types.h
+@@ -0,0 +1,85 @@
++/*
++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, see <http://www.gnu.org/licenses/>.
++ */
++
++#ifndef _ORC_TYPES_H
++#define _ORC_TYPES_H
++
++#include <linux/types.h>
++#include <linux/compiler.h>
++
++/*
++ * The ORC_REG_* registers are base registers which are used to find other
++ * registers on the stack.
++ *
++ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
++ * address of the previous frame: the caller's SP before it called the current
++ * function.
++ *
++ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
++ * the current frame.
++ *
++ * The most commonly used base registers are SP and BP -- which the previous SP
++ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is
++ * usually based on.
++ *
++ * The rest of the base registers are needed for special cases like entry code
++ * and GCC realigned stacks.
++ */
++#define ORC_REG_UNDEFINED             0
++#define ORC_REG_PREV_SP                       1
++#define ORC_REG_DX                    2
++#define ORC_REG_DI                    3
++#define ORC_REG_BP                    4
++#define ORC_REG_SP                    5
++#define ORC_REG_R10                   6
++#define ORC_REG_R13                   7
++#define ORC_REG_BP_INDIRECT           8
++#define ORC_REG_SP_INDIRECT           9
++#define ORC_REG_MAX                   15
++
++/*
++ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
++ * caller's SP right before it made the call).  Used for all callable
++ * functions, i.e. all C code and all callable asm functions.
++ *
++ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
++ * to a fully populated pt_regs from a syscall, interrupt, or exception.
++ *
++ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
++ * points to the iret return frame.
++ */
++#define ORC_TYPE_CALL                 0
++#define ORC_TYPE_REGS                 1
++#define ORC_TYPE_REGS_IRET            2
++
++/*
++ * This struct is more or less a vastly simplified version of the DWARF Call
++ * Frame Information standard.  It contains only the necessary parts of DWARF
++ * CFI, simplified for ease of access by the in-kernel unwinder.  It tells the
++ * unwinder how to find the previous SP and BP (and sometimes entry regs) on
++ * the stack for a given code address.  Each instance of the struct corresponds
++ * to one or more code locations.
++ */
++struct orc_entry {
++      s16             sp_offset;
++      s16             bp_offset;
++      unsigned        sp_reg:4;
++      unsigned        bp_reg:4;
++      unsigned        type:2;
++} __packed;
++
++#endif /* _ORC_TYPES_H */
+diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
+index 365c34ecab26..eedf089b1495 100644
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -52,5 +52,5 @@ int cmd_check(int argc, const char **argv)
+ 
+       objname = argv[0];
+ 
+-      return check(objname, nofp);
++      return check(objname, nofp, false);
+ }
+diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
+new file mode 100644
+index 000000000000..5ca41ab0df48
+--- /dev/null
++++ b/tools/objtool/builtin-orc.c
+@@ -0,0 +1,70 @@
++/*
++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, see <http://www.gnu.org/licenses/>.
++ */
++
++/*
++ * objtool orc:
++ *
++ * This command analyzes a .o file and adds .orc_unwind and .orc_unwind_ip
++ * sections to it, which is used by the in-kernel ORC unwinder.
++ *
++ * This command is a superset of "objtool check".
++ */
++
++#include <string.h>
++#include <subcmd/parse-options.h>
++#include "builtin.h"
++#include "check.h"
++
++
++static const char *orc_usage[] = {
++      "objtool orc generate [<options>] file.o",
++      "objtool orc dump file.o",
++      NULL,
++};
++
++extern const struct option check_options[];
++extern bool nofp;
++
++int cmd_orc(int argc, const char **argv)
++{
++      const char *objname;
++
++      argc--; argv++;
++      if (!strncmp(argv[0], "gen", 3)) {
++              argc = parse_options(argc, argv, check_options, orc_usage, 0);
++              if (argc != 1)
++                      usage_with_options(orc_usage, check_options);
++
++              objname = argv[0];
++
++              return check(objname, nofp, true);
++
++      }
++
++      if (!strcmp(argv[0], "dump")) {
++              if (argc != 2)
++                      usage_with_options(orc_usage, check_options);
++
++              objname = argv[1];
++
++              return orc_dump(objname);
++      }
++
++      usage_with_options(orc_usage, check_options);
++
++      return 0;
++}
+diff --git a/tools/objtool/check.c b/tools/objtool/check.c
+index 2c6d74880403..cb57c526ba17 100644
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -36,8 +36,8 @@ const char *objname;
+ static bool nofp;
+ struct cfi_state initial_func_cfi;
+ 
+-static struct instruction *find_insn(struct objtool_file *file,
+-                                   struct section *sec, unsigned long offset)
++struct instruction *find_insn(struct objtool_file *file,
++                            struct section *sec, unsigned long offset)
+ {
+       struct instruction *insn;
+ 
+@@ -259,6 +259,11 @@ static int decode_instructions(struct objtool_file *file)
+               if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+                       continue;
+ 
++              if (strcmp(sec->name, ".altinstr_replacement") &&
++                  strcmp(sec->name, ".altinstr_aux") &&
++                  strncmp(sec->name, ".discard.", 9))
++                      sec->text = true;
++
+               for (offset = 0; offset < sec->len; offset += insn->len) {
+                       insn = malloc(sizeof(*insn));
+                       if (!insn) {
+@@ -947,6 +952,30 @@ static bool has_valid_stack_frame(struct insn_state *state)
+       return false;
+ }
+ 
++static int update_insn_state_regs(struct instruction *insn, struct insn_state *state)
++{
++      struct cfi_reg *cfa = &state->cfa;
++      struct stack_op *op = &insn->stack_op;
++
++      if (cfa->base != CFI_SP)
++              return 0;
++
++      /* push */
++      if (op->dest.type == OP_DEST_PUSH)
++              cfa->offset += 8;
++
++      /* pop */
++      if (op->src.type == OP_SRC_POP)
++              cfa->offset -= 8;
++
++      /* add immediate to sp */
++      if (op->dest.type == OP_DEST_REG && op->src.type == OP_SRC_ADD &&
++          op->dest.reg == CFI_SP && op->src.reg == CFI_SP)
++              cfa->offset -= op->src.offset;
++
++      return 0;
++}
++
+ static void save_reg(struct insn_state *state, unsigned char reg, int base,
+                    int offset)
+ {
+@@ -1032,6 +1061,9 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
+               return 0;
+       }
+ 
++      if (state->type == ORC_TYPE_REGS || state->type == ORC_TYPE_REGS_IRET)
++              return update_insn_state_regs(insn, state);
++
+       switch (op->dest.type) {
+ 
+       case OP_DEST_REG:
+@@ -1323,6 +1355,10 @@ static bool insn_state_match(struct instruction *insn, struct insn_state *state)
+                       break;
+               }
+ 
++      } else if (state1->type != state2->type) {
++              WARN_FUNC("stack state mismatch: type1=%d type2=%d",
++                        insn->sec, insn->offset, state1->type, state2->type);
++
+       } else if (state1->drap != state2->drap ||
+                (state1->drap && state1->drap_reg != state2->drap_reg)) {
+               WARN_FUNC("stack state mismatch: drap1=%d(%d) drap2=%d(%d)",
+@@ -1613,7 +1649,7 @@ static void cleanup(struct objtool_file *file)
+       elf_close(file->elf);
+ }
+ 
+-int check(const char *_objname, bool _nofp)
++int check(const char *_objname, bool _nofp, bool orc)
+ {
+       struct objtool_file file;
+       int ret, warnings = 0;
+@@ -1621,7 +1657,7 @@ int check(const char *_objname, bool _nofp)
+       objname = _objname;
+       nofp = _nofp;
+ 
+-      file.elf = elf_open(objname);
++      file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
+       if (!file.elf)
+               return 1;
+ 
+@@ -1654,6 +1690,20 @@ int check(const char *_objname, bool _nofp)
+               warnings += ret;
+       }
+ 
++      if (orc) {
++              ret = create_orc(&file);
++              if (ret < 0)
++                      goto out;
++
++              ret = create_orc_sections(&file);
++              if (ret < 0)
++                      goto out;
++
++              ret = elf_write(file.elf);
++              if (ret < 0)
++                      goto out;
++      }
++
+ out:
+       cleanup(&file);
+ 
+diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
+index 1a7e8aa2af58..6e9f980a7d26 100644
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -30,16 +30,6 @@
+ #include "elf.h"
+ #include "warn.h"
+ 
+-/*
+- * Fallback for systems without this "read, mmaping if possible" cmd.
+- */
+-#ifndef ELF_C_READ_MMAP
+-#define ELF_C_READ_MMAP ELF_C_READ
+-#endif
+-
+-#define WARN_ELF(format, ...)                                 \
+-      WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
+-
+ struct section *find_section_by_name(struct elf *elf, const char *name)
+ {
+       struct section *sec;
+@@ -349,9 +339,10 @@ static int read_relas(struct elf *elf)
+       return 0;
+ }
+ 
+-struct elf *elf_open(const char *name)
++struct elf *elf_open(const char *name, int flags)
+ {
+       struct elf *elf;
++      Elf_Cmd cmd;
+ 
+       elf_version(EV_CURRENT);
+ 
+@@ -364,13 +355,20 @@ struct elf *elf_open(const char *name)
+ 
+       INIT_LIST_HEAD(&elf->sections);
+ 
+-      elf->fd = open(name, O_RDONLY);
++      elf->fd = open(name, flags);
+       if (elf->fd == -1) {
+               perror("open");
+               goto err;
+       }
+ 
+-      elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL);
++      if ((flags & O_ACCMODE) == O_RDONLY)
++              cmd = ELF_C_READ_MMAP;
++      else if ((flags & O_ACCMODE) == O_RDWR)
++              cmd = ELF_C_RDWR;
++      else /* O_WRONLY */
++              cmd = ELF_C_WRITE;
++
++      elf->elf = elf_begin(elf->fd, cmd, NULL);
+       if (!elf->elf) {
+               WARN_ELF("elf_begin");
+               goto err;
+@@ -397,6 +395,194 @@ struct elf *elf_open(const char *name)
+       return NULL;
+ }
+ 
++struct section *elf_create_section(struct elf *elf, const char *name,
++                                 size_t entsize, int nr)
++{
++      struct section *sec, *shstrtab;
++      size_t size = entsize * nr;
++      struct Elf_Scn *s;
++      Elf_Data *data;
++
++      sec = malloc(sizeof(*sec));
++      if (!sec) {
++              perror("malloc");
++              return NULL;
++      }
++      memset(sec, 0, sizeof(*sec));
++
++      INIT_LIST_HEAD(&sec->symbol_list);
++      INIT_LIST_HEAD(&sec->rela_list);
++      hash_init(sec->rela_hash);
++      hash_init(sec->symbol_hash);
++
++      list_add_tail(&sec->list, &elf->sections);
++
++      s = elf_newscn(elf->elf);
++      if (!s) {
++              WARN_ELF("elf_newscn");
++              return NULL;
++      }
++
++      sec->name = strdup(name);
++      if (!sec->name) {
++              perror("strdup");
++              return NULL;
++      }
++
++      sec->idx = elf_ndxscn(s);
++      sec->len = size;
++      sec->changed = true;
++
++      sec->data = elf_newdata(s);
++      if (!sec->data) {
++              WARN_ELF("elf_newdata");
++              return NULL;
++      }
++
++      sec->data->d_size = size;
++      sec->data->d_align = 1;
++
++      if (size) {
++              sec->data->d_buf = malloc(size);
++              if (!sec->data->d_buf) {
++                      perror("malloc");
++                      return NULL;
++              }
++              memset(sec->data->d_buf, 0, size);
++      }
++
++      if (!gelf_getshdr(s, &sec->sh)) {
++              WARN_ELF("gelf_getshdr");
++              return NULL;
++      }
++
++      sec->sh.sh_size = size;
++      sec->sh.sh_entsize = entsize;
++      sec->sh.sh_type = SHT_PROGBITS;
++      sec->sh.sh_addralign = 1;
++      sec->sh.sh_flags = SHF_ALLOC;
++
++
++      /* Add section name to .shstrtab */
++      shstrtab = find_section_by_name(elf, ".shstrtab");
++      if (!shstrtab) {
++              WARN("can't find .shstrtab section");
++              return NULL;
++      }
++
++      s = elf_getscn(elf->elf, shstrtab->idx);
++      if (!s) {
++              WARN_ELF("elf_getscn");
++              return NULL;
++      }
++
++      data = elf_newdata(s);
++      if (!data) {
++              WARN_ELF("elf_newdata");
++              return NULL;
++      }
++
++      data->d_buf = sec->name;
++      data->d_size = strlen(name) + 1;
++      data->d_align = 1;
++
++      sec->sh.sh_name = shstrtab->len;
++
++      shstrtab->len += strlen(name) + 1;
++      shstrtab->changed = true;
++
++      return sec;
++}
++
++struct section *elf_create_rela_section(struct elf *elf, struct section *base)
++{
++      char *relaname;
++      struct section *sec;
++
++      relaname = malloc(strlen(base->name) + strlen(".rela") + 1);
++      if (!relaname) {
++              perror("malloc");
++              return NULL;
++      }
++      strcpy(relaname, ".rela");
++      strcat(relaname, base->name);
++
++      sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0);
++      if (!sec)
++              return NULL;
++
++      base->rela = sec;
++      sec->base = base;
++
++      sec->sh.sh_type = SHT_RELA;
++      sec->sh.sh_addralign = 8;
++      sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
++      sec->sh.sh_info = base->idx;
++      sec->sh.sh_flags = SHF_INFO_LINK;
++
++      return sec;
++}
++
++int elf_rebuild_rela_section(struct section *sec)
++{
++      struct rela *rela;
++      int nr, idx = 0, size;
++      GElf_Rela *relas;
++
++      nr = 0;
++      list_for_each_entry(rela, &sec->rela_list, list)
++              nr++;
++
++      size = nr * sizeof(*relas);
++      relas = malloc(size);
++      if (!relas) {
++              perror("malloc");
++              return -1;
++      }
++
++      sec->data->d_buf = relas;
++      sec->data->d_size = size;
++
++      sec->sh.sh_size = size;
++
++      idx = 0;
++      list_for_each_entry(rela, &sec->rela_list, list) {
++              relas[idx].r_offset = rela->offset;
++              relas[idx].r_addend = rela->addend;
++              relas[idx].r_info = GELF_R_INFO(rela->sym->idx, rela->type);
++              idx++;
++      }
++
++      return 0;
++}
++
++int elf_write(struct elf *elf)
++{
++      struct section *sec;
++      Elf_Scn *s;
++
++      list_for_each_entry(sec, &elf->sections, list) {
++              if (sec->changed) {
++                      s = elf_getscn(elf->elf, sec->idx);
++                      if (!s) {
++                              WARN_ELF("elf_getscn");
++                              return -1;
++                      }
++                      if (!gelf_update_shdr (s, &sec->sh)) {
++                              WARN_ELF("gelf_update_shdr");
++                              return -1;
++                      }
++              }
++      }
++
++      if (elf_update(elf->elf, ELF_C_WRITE) < 0) {
++              WARN_ELF("elf_update");
++              return -1;
++      }
++
++      return 0;
++}
++
+ void elf_close(struct elf *elf)
+ {
+       struct section *sec, *tmpsec;
+diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
+index ecc5b1b5d15d..31e0f9143840 100644
+--- a/tools/objtool/objtool.c
++++ b/tools/objtool/objtool.c
+@@ -42,10 +42,11 @@ struct cmd_struct {
+ };
+ 
+ static const char objtool_usage_string[] =
+-      "objtool [OPTIONS] COMMAND [ARGS]";
++      "objtool COMMAND [ARGS]";
+ 
+ static struct cmd_struct objtool_cmds[] = {
+       {"check",       cmd_check,      "Perform stack metadata validation on an object file" },
++      {"orc",         cmd_orc,        "Generate in-place ORC unwind tables for an object file" },
+ };
+ 
+ bool help;
+diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
+new file mode 100644
+index 000000000000..36c5bf6a2675
+--- /dev/null
++++ b/tools/objtool/orc_dump.c
+@@ -0,0 +1,212 @@
++/*
++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, see <http://www.gnu.org/licenses/>.
++ */
++
++#include <unistd.h>
++#include "orc.h"
++#include "warn.h"
++
++static const char *reg_name(unsigned int reg)
++{
++      switch (reg) {
++      case ORC_REG_PREV_SP:
++              return "prevsp";
++      case ORC_REG_DX:
++              return "dx";
++      case ORC_REG_DI:
++              return "di";
++      case ORC_REG_BP:
++              return "bp";
++      case ORC_REG_SP:
++              return "sp";
++      case ORC_REG_R10:
++              return "r10";
++      case ORC_REG_R13:
++              return "r13";
++      case ORC_REG_BP_INDIRECT:
++              return "bp(ind)";
++      case ORC_REG_SP_INDIRECT:
++              return "sp(ind)";
++      default:
++              return "?";
++      }
++}
++
++static const char *orc_type_name(unsigned int type)
++{
++      switch (type) {
++      case ORC_TYPE_CALL:
++              return "call";
++      case ORC_TYPE_REGS:
++              return "regs";
++      case ORC_TYPE_REGS_IRET:
++              return "iret";
++      default:
++              return "?";
++      }
++}
++
++static void print_reg(unsigned int reg, int offset)
++{
++      if (reg == ORC_REG_BP_INDIRECT)
++              printf("(bp%+d)", offset);
++      else if (reg == ORC_REG_SP_INDIRECT)
++              printf("(sp%+d)", offset);
++      else if (reg == ORC_REG_UNDEFINED)
++              printf("(und)");
++      else
++              printf("%s%+d", reg_name(reg), offset);
++}
++
++int orc_dump(const char *_objname)
++{
++      int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0;
++      struct orc_entry *orc = NULL;
++      char *name;
++      unsigned long nr_sections, orc_ip_addr = 0;
++      size_t shstrtab_idx;
++      Elf *elf;
++      Elf_Scn *scn;
++      GElf_Shdr sh;
++      GElf_Rela rela;
++      GElf_Sym sym;
++      Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL;
++
++
++      objname = _objname;
++
++      elf_version(EV_CURRENT);
++
++      fd = open(objname, O_RDONLY);
++      if (fd == -1) {
++              perror("open");
++              return -1;
++      }
++
++      elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
++      if (!elf) {
++              WARN_ELF("elf_begin");
++              return -1;
++      }
++
++      if (elf_getshdrnum(elf, &nr_sections)) {
++              WARN_ELF("elf_getshdrnum");
++              return -1;
++      }
++
++      if (elf_getshdrstrndx(elf, &shstrtab_idx)) {
++              WARN_ELF("elf_getshdrstrndx");
++              return -1;
++      }
++
++      for (i = 0; i < nr_sections; i++) {
++              scn = elf_getscn(elf, i);
++              if (!scn) {
++                      WARN_ELF("elf_getscn");
++                      return -1;
++              }
++
++              if (!gelf_getshdr(scn, &sh)) {
++                      WARN_ELF("gelf_getshdr");
++                      return -1;
++              }
++
++              name = elf_strptr(elf, shstrtab_idx, sh.sh_name);
++              if (!name) {
++                      WARN_ELF("elf_strptr");
++                      return -1;
++              }
++
++              data = elf_getdata(scn, NULL);
++              if (!data) {
++                      WARN_ELF("elf_getdata");
++                      return -1;
++              }
++
++              if (!strcmp(name, ".symtab")) {
++                      symtab = data;
++              } else if (!strcmp(name, ".orc_unwind")) {
++                      orc = data->d_buf;
++                      orc_size = sh.sh_size;
++              } else if (!strcmp(name, ".orc_unwind_ip")) {
++                      orc_ip = data->d_buf;
++                      orc_ip_addr = sh.sh_addr;
++              } else if (!strcmp(name, ".rela.orc_unwind_ip")) {
++                      rela_orc_ip = data;
++              }
++      }
++
++      if (!symtab || !orc || !orc_ip)
++              return 0;
++
++      if (orc_size % sizeof(*orc) != 0) {
++              WARN("bad .orc_unwind section size");
++              return -1;
++      }
++
++      nr_entries = orc_size / sizeof(*orc);
++      for (i = 0; i < nr_entries; i++) {
++              if (rela_orc_ip) {
++                      if (!gelf_getrela(rela_orc_ip, i, &rela)) {
++                              WARN_ELF("gelf_getrela");
++                              return -1;
++                      }
++
++                      if (!gelf_getsym(symtab, GELF_R_SYM(rela.r_info), &sym)) {
++                              WARN_ELF("gelf_getsym");
++                              return -1;
++                      }
++
++                      scn = elf_getscn(elf, sym.st_shndx);
++                      if (!scn) {
++                              WARN_ELF("elf_getscn");
++                              return -1;
++                      }
++
++                      if (!gelf_getshdr(scn, &sh)) {
++                              WARN_ELF("gelf_getshdr");
++                              return -1;
++                      }
++
++                      name = elf_strptr(elf, shstrtab_idx, sh.sh_name);
++                      if (!name || !*name) {
++                              WARN_ELF("elf_strptr");
++                              return -1;
++                      }
++
++                      printf("%s+%lx:", name, rela.r_addend);
++
++              } else {
++                      printf("%lx:", orc_ip_addr + (i * sizeof(int)) + orc_ip[i]);
++              }
++
++
++              printf(" sp:");
++
++              print_reg(orc[i].sp_reg, orc[i].sp_offset);
++
++              printf(" bp:");
++
++              print_reg(orc[i].bp_reg, orc[i].bp_offset);
++
++              printf(" type:%s\n", orc_type_name(orc[i].type));
++      }
++
++      elf_end(elf);
++      close(fd);
++
++      return 0;
++}
+diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
+new file mode 100644
+index 000000000000..e5ca31429c9b
+--- /dev/null
++++ b/tools/objtool/orc_gen.c
+@@ -0,0 +1,214 @@
++/*
++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, see <http://www.gnu.org/licenses/>.
++ */
++
++#include <stdlib.h>
++#include <string.h>
++
++#include "orc.h"
++#include "check.h"
++#include "warn.h"
++
++int create_orc(struct objtool_file *file)
++{
++      struct instruction *insn;
++
++      for_each_insn(file, insn) {
++              struct orc_entry *orc = &insn->orc;
++              struct cfi_reg *cfa = &insn->state.cfa;
++              struct cfi_reg *bp = &insn->state.regs[CFI_BP];
++
++              if (cfa->base == CFI_UNDEFINED) {
++                      orc->sp_reg = ORC_REG_UNDEFINED;
++                      continue;
++              }
++
++              switch (cfa->base) {
++              case CFI_SP:
++                      orc->sp_reg = ORC_REG_SP;
++                      break;
++              case CFI_SP_INDIRECT:
++                      orc->sp_reg = ORC_REG_SP_INDIRECT;
++                      break;
++              case CFI_BP:
++                      orc->sp_reg = ORC_REG_BP;
++                      break;
++              case CFI_BP_INDIRECT:
++                      orc->sp_reg = ORC_REG_BP_INDIRECT;
++                      break;
++              case CFI_R10:
++                      orc->sp_reg = ORC_REG_R10;
++                      break;
++              case CFI_R13:
++                      orc->sp_reg = ORC_REG_R13;
++                      break;
++              case CFI_DI:
++                      orc->sp_reg = ORC_REG_DI;
++                      break;
++              case CFI_DX:
++                      orc->sp_reg = ORC_REG_DX;
++                      break;
++              default:
++                      WARN_FUNC("unknown CFA base reg %d",
++                                insn->sec, insn->offset, cfa->base);
++                      return -1;
++              }
++
++              switch(bp->base) {
++              case CFI_UNDEFINED:
++                      orc->bp_reg = ORC_REG_UNDEFINED;
++                      break;
++              case CFI_CFA:
++                      orc->bp_reg = ORC_REG_PREV_SP;
++                      break;
++              case CFI_BP:
++                      orc->bp_reg = ORC_REG_BP;
++                      break;
++              default:
++                      WARN_FUNC("unknown BP base reg %d",
++                                insn->sec, insn->offset, bp->base);
++                      return -1;
++              }
++
++              orc->sp_offset = cfa->offset;
++              orc->bp_offset = bp->offset;
++              orc->type = insn->state.type;
++      }
++
++      return 0;
++}
++
++static int create_orc_entry(struct section *u_sec, struct section *ip_relasec,
++                              unsigned int idx, struct section *insn_sec,
++                              unsigned long insn_off, struct orc_entry *o)
++{
++      struct orc_entry *orc;
++      struct rela *rela;
++
++      /* populate ORC data */
++      orc = (struct orc_entry *)u_sec->data->d_buf + idx;
++      memcpy(orc, o, sizeof(*orc));
++
++      /* populate rela for ip */
++      rela = malloc(sizeof(*rela));
++      if (!rela) {
++              perror("malloc");
++              return -1;
++      }
++      memset(rela, 0, sizeof(*rela));
++
++      rela->sym = insn_sec->sym;
++      rela->addend = insn_off;
++      rela->type = R_X86_64_PC32;
++      rela->offset = idx * sizeof(int);
++
++      list_add_tail(&rela->list, &ip_relasec->rela_list);
++      hash_add(ip_relasec->rela_hash, &rela->hash, rela->offset);
++
++      return 0;
++}
++
++int create_orc_sections(struct objtool_file *file)
++{
++      struct instruction *insn, *prev_insn;
++      struct section *sec, *u_sec, *ip_relasec;
++      unsigned int idx;
++
++      struct orc_entry empty = {
++              .sp_reg = ORC_REG_UNDEFINED,
++              .bp_reg  = ORC_REG_UNDEFINED,
++              .type    = ORC_TYPE_CALL,
++      };
++
++      sec = find_section_by_name(file->elf, ".orc_unwind");
++      if (sec) {
++              WARN("file already has .orc_unwind section, skipping");
++              return -1;
++      }
++
++      /* count the number of needed orcs */
++      idx = 0;
++      for_each_sec(file, sec) {
++              if (!sec->text)
++                      continue;
++
++              prev_insn = NULL;
++              sec_for_each_insn(file, sec, insn) {
++                      if (!prev_insn ||
++                          memcmp(&insn->orc, &prev_insn->orc,
++                                 sizeof(struct orc_entry))) {
++                              idx++;
++                      }
++                      prev_insn = insn;
++              }
++
++              /* section terminator */
++              if (prev_insn)
++                      idx++;
++      }
++      if (!idx)
++              return -1;
++
++
++      /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
++      sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
++
++      ip_relasec = elf_create_rela_section(file->elf, sec);
++      if (!ip_relasec)
++              return -1;
++
++      /* create .orc_unwind section */
++      u_sec = elf_create_section(file->elf, ".orc_unwind",
++                                 sizeof(struct orc_entry), idx);
++
++      /* populate sections */
++      idx = 0;
++      for_each_sec(file, sec) {
++              if (!sec->text)
++                      continue;
++
++              prev_insn = NULL;
++              sec_for_each_insn(file, sec, insn) {
++                      if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
++                                               sizeof(struct orc_entry))) {
++
++                              if (create_orc_entry(u_sec, ip_relasec, idx,
++                                                   insn->sec, insn->offset,
++                                                   &insn->orc))
++                                      return -1;
++
++                              idx++;
++                      }
++                      prev_insn = insn;
++              }
++
++              /* section terminator */
++              if (prev_insn) {
++                      if (create_orc_entry(u_sec, ip_relasec, idx,
++                                           prev_insn->sec,
++                                           prev_insn->offset + prev_insn->len,
++                                           &empty))
++                              return -1;
++
++                      idx++;
++              }
++      }
++
++      if (elf_rebuild_rela_section(ip_relasec))
++              return -1;
++
++      return 0;
++}
+diff --git a/tools/objtool/Build b/tools/objtool/Build
+index 6f2e1987c4d9..749becdf5b90 100644
+--- a/tools/objtool/Build
++++ b/tools/objtool/Build
+@@ -1,6 +1,9 @@
+ objtool-y += arch/$(SRCARCH)/
+ objtool-y += builtin-check.o
++objtool-y += builtin-orc.o
+ objtool-y += check.o
++objtool-y += orc_gen.o
++objtool-y += orc_dump.o
+ objtool-y += elf.o
+ objtool-y += special.o
+ objtool-y += objtool.o
+-- 
+2.14.2
+
diff --git a/patches/kernel/0036-objtool-x86-Add-facility-for-asm-code-to-provide-unw.patch b/patches/kernel/0036-objtool-x86-Add-facility-for-asm-code-to-provide-unw.patch

deleted file mode 100644 (file)

index 3c4000c..0000000
--- a/patches/kernel/0036-objtool-x86-Add-facility-for-asm-code-to-provide-unw.patch
+++ /dev/null
@@ -1,641 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Tue, 11 Jul 2017 10:33:43 -0500
-Subject: [PATCH] objtool, x86: Add facility for asm code to provide unwind
- hints
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Some asm (and inline asm) code does special things to the stack which
-objtool can't understand.  (Nor can GCC or GNU assembler, for that
-matter.)  In such cases we need a facility for the code to provide
-annotations, so the unwinder can unwind through it.
-
-This provides such a facility, in the form of unwind hints.  They're
-similar to the GNU assembler .cfi* directives, but they give more
-information, and are needed in far fewer places, because objtool can
-fill in the blanks by following branches and adjusting the stack pointer
-for pushes and pops.
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mike Galbraith <efault@gmx.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: live-patching@vger.kernel.org
-Link: http://lkml.kernel.org/r/0f5f3c9104fca559ff4088bece1d14ae3bca52d5.1499786555.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 39358a033b2e4432052265c1fa0f36f572d8cfb5)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit a1fed2e10e84d48643a09861c2d127968621813e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/objtool/Makefile              |   3 +
- arch/x86/include/asm/orc_types.h    | 107 ++++++++++++++++++++
- arch/x86/include/asm/unwind_hints.h | 103 +++++++++++++++++++
- tools/objtool/check.h               |   4 +-
- tools/objtool/orc_types.h           |  22 +++++
- tools/objtool/check.c               | 191 +++++++++++++++++++++++++++++++++---
- 6 files changed, 417 insertions(+), 13 deletions(-)
- create mode 100644 arch/x86/include/asm/orc_types.h
- create mode 100644 arch/x86/include/asm/unwind_hints.h
-
-diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
-index 0e2765e243c0..3a6425fefc43 100644
---- a/tools/objtool/Makefile
-+++ b/tools/objtool/Makefile
-@@ -52,6 +52,9 @@ $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
-       diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
-       diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
-       || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true
-+      @(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
-+      diff ../../arch/x86/include/asm/orc_types.h orc_types.h >/dev/null) \
-+      || echo "warning: objtool: orc_types.h differs from kernel" >&2 )) || true
-       $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
- 
- 
-diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
-new file mode 100644
-index 000000000000..7dc777a6cb40
---- /dev/null
-+++ b/arch/x86/include/asm/orc_types.h
-@@ -0,0 +1,107 @@
-+/*
-+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License
-+ * as published by the Free Software Foundation; either version 2
-+ * of the License, or (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
-+ */
-+
-+#ifndef _ORC_TYPES_H
-+#define _ORC_TYPES_H
-+
-+#include <linux/types.h>
-+#include <linux/compiler.h>
-+
-+/*
-+ * The ORC_REG_* registers are base registers which are used to find other
-+ * registers on the stack.
-+ *
-+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
-+ * address of the previous frame: the caller's SP before it called the current
-+ * function.
-+ *
-+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
-+ * the current frame.
-+ *
-+ * The most commonly used base registers are SP and BP -- which the previous SP
-+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is
-+ * usually based on.
-+ *
-+ * The rest of the base registers are needed for special cases like entry code
-+ * and GCC realigned stacks.
-+ */
-+#define ORC_REG_UNDEFINED             0
-+#define ORC_REG_PREV_SP                       1
-+#define ORC_REG_DX                    2
-+#define ORC_REG_DI                    3
-+#define ORC_REG_BP                    4
-+#define ORC_REG_SP                    5
-+#define ORC_REG_R10                   6
-+#define ORC_REG_R13                   7
-+#define ORC_REG_BP_INDIRECT           8
-+#define ORC_REG_SP_INDIRECT           9
-+#define ORC_REG_MAX                   15
-+
-+/*
-+ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
-+ * caller's SP right before it made the call).  Used for all callable
-+ * functions, i.e. all C code and all callable asm functions.
-+ *
-+ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
-+ * to a fully populated pt_regs from a syscall, interrupt, or exception.
-+ *
-+ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
-+ * points to the iret return frame.
-+ *
-+ * The UNWIND_HINT macros are used only for the unwind_hint struct.  They
-+ * aren't used in struct orc_entry due to size and complexity constraints.
-+ * Objtool converts them to real types when it converts the hints to orc
-+ * entries.
-+ */
-+#define ORC_TYPE_CALL                 0
-+#define ORC_TYPE_REGS                 1
-+#define ORC_TYPE_REGS_IRET            2
-+#define UNWIND_HINT_TYPE_SAVE         3
-+#define UNWIND_HINT_TYPE_RESTORE      4
-+
-+#ifndef __ASSEMBLY__
-+/*
-+ * This struct is more or less a vastly simplified version of the DWARF Call
-+ * Frame Information standard.  It contains only the necessary parts of DWARF
-+ * CFI, simplified for ease of access by the in-kernel unwinder.  It tells the
-+ * unwinder how to find the previous SP and BP (and sometimes entry regs) on
-+ * the stack for a given code address.  Each instance of the struct corresponds
-+ * to one or more code locations.
-+ */
-+struct orc_entry {
-+      s16             sp_offset;
-+      s16             bp_offset;
-+      unsigned        sp_reg:4;
-+      unsigned        bp_reg:4;
-+      unsigned        type:2;
-+};
-+
-+/*
-+ * This struct is used by asm and inline asm code to manually annotate the
-+ * location of registers on the stack for the ORC unwinder.
-+ *
-+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
-+ */
-+struct unwind_hint {
-+      u32             ip;
-+      s16             sp_offset;
-+      u8              sp_reg;
-+      u8              type;
-+};
-+#endif /* __ASSEMBLY__ */
-+
-+#endif /* _ORC_TYPES_H */
-diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
-new file mode 100644
-index 000000000000..5e02b11c9b86
---- /dev/null
-+++ b/arch/x86/include/asm/unwind_hints.h
-@@ -0,0 +1,103 @@
-+#ifndef _ASM_X86_UNWIND_HINTS_H
-+#define _ASM_X86_UNWIND_HINTS_H
-+
-+#include "orc_types.h"
-+
-+#ifdef __ASSEMBLY__
-+
-+/*
-+ * In asm, there are two kinds of code: normal C-type callable functions and
-+ * the rest.  The normal callable functions can be called by other code, and
-+ * don't do anything unusual with the stack.  Such normal callable functions
-+ * are annotated with the ENTRY/ENDPROC macros.  Most asm code falls in this
-+ * category.  In this case, no special debugging annotations are needed because
-+ * objtool can automatically generate the ORC data for the ORC unwinder to read
-+ * at runtime.
-+ *
-+ * Anything which doesn't fall into the above category, such as syscall and
-+ * interrupt handlers, tends to not be called directly by other functions, and
-+ * often does unusual non-C-function-type things with the stack pointer.  Such
-+ * code needs to be annotated such that objtool can understand it.  The
-+ * following CFI hint macros are for this type of code.
-+ *
-+ * These macros provide hints to objtool about the state of the stack at each
-+ * instruction.  Objtool starts from the hints and follows the code flow,
-+ * making automatic CFI adjustments when it sees pushes and pops, filling out
-+ * the debuginfo as necessary.  It will also warn if it sees any
-+ * inconsistencies.
-+ */
-+.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL
-+#ifdef CONFIG_STACK_VALIDATION
-+.Lunwind_hint_ip_\@:
-+      .pushsection .discard.unwind_hints
-+              /* struct unwind_hint */
-+              .long .Lunwind_hint_ip_\@ - .
-+              .short \sp_offset
-+              .byte \sp_reg
-+              .byte \type
-+      .popsection
-+#endif
-+.endm
-+
-+.macro UNWIND_HINT_EMPTY
-+      UNWIND_HINT sp_reg=ORC_REG_UNDEFINED
-+.endm
-+
-+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
-+      .if \base == %rsp && \indirect
-+              .set sp_reg, ORC_REG_SP_INDIRECT
-+      .elseif \base == %rsp
-+              .set sp_reg, ORC_REG_SP
-+      .elseif \base == %rbp
-+              .set sp_reg, ORC_REG_BP
-+      .elseif \base == %rdi
-+              .set sp_reg, ORC_REG_DI
-+      .elseif \base == %rdx
-+              .set sp_reg, ORC_REG_DX
-+      .elseif \base == %r10
-+              .set sp_reg, ORC_REG_R10
-+      .else
-+              .error "UNWIND_HINT_REGS: bad base register"
-+      .endif
-+
-+      .set sp_offset, \offset
-+
-+      .if \iret
-+              .set type, ORC_TYPE_REGS_IRET
-+      .elseif \extra == 0
-+              .set type, ORC_TYPE_REGS_IRET
-+              .set sp_offset, \offset + (16*8)
-+      .else
-+              .set type, ORC_TYPE_REGS
-+      .endif
-+
-+      UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
-+.endm
-+
-+.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
-+      UNWIND_HINT_REGS base=\base offset=\offset iret=1
-+.endm
-+
-+.macro UNWIND_HINT_FUNC sp_offset=8
-+      UNWIND_HINT sp_offset=\sp_offset
-+.endm
-+
-+#else /* !__ASSEMBLY__ */
-+
-+#define UNWIND_HINT(sp_reg, sp_offset, type)                  \
-+      "987: \n\t"                                             \
-+      ".pushsection .discard.unwind_hints\n\t"                \
-+      /* struct unwind_hint */                                \
-+      ".long 987b - .\n\t"                                    \
-+      ".short " __stringify(sp_offset) "\n\t"         \
-+      ".byte " __stringify(sp_reg) "\n\t"                     \
-+      ".byte " __stringify(type) "\n\t"                       \
-+      ".popsection\n\t"
-+
-+#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE)
-+
-+#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE)
-+
-+#endif /* __ASSEMBLY__ */
-+
-+#endif /* _ASM_X86_UNWIND_HINTS_H */
-diff --git a/tools/objtool/check.h b/tools/objtool/check.h
-index 046874bbe226..ac3d4b13f17b 100644
---- a/tools/objtool/check.h
-+++ b/tools/objtool/check.h
-@@ -43,7 +43,7 @@ struct instruction {
-       unsigned int len;
-       unsigned char type;
-       unsigned long immediate;
--      bool alt_group, visited, dead_end, ignore;
-+      bool alt_group, visited, dead_end, ignore, hint, save, restore;
-       struct symbol *call_dest;
-       struct instruction *jump_dest;
-       struct list_head alts;
-@@ -58,7 +58,7 @@ struct objtool_file {
-       struct list_head insn_list;
-       DECLARE_HASHTABLE(insn_hash, 16);
-       struct section *rodata, *whitelist;
--      bool ignore_unreachables, c_file;
-+      bool ignore_unreachables, c_file, hints;
- };
- 
- int check(const char *objname, bool nofp, bool orc);
-diff --git a/tools/objtool/orc_types.h b/tools/objtool/orc_types.h
-index fc5cf6cffd9a..9c9dc579bd7d 100644
---- a/tools/objtool/orc_types.h
-+++ b/tools/objtool/orc_types.h
-@@ -61,11 +61,19 @@
-  *
-  * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
-  * points to the iret return frame.
-+ *
-+ * The UNWIND_HINT macros are used only for the unwind_hint struct.  They
-+ * aren't used in struct orc_entry due to size and complexity constraints.
-+ * Objtool converts them to real types when it converts the hints to orc
-+ * entries.
-  */
- #define ORC_TYPE_CALL                 0
- #define ORC_TYPE_REGS                 1
- #define ORC_TYPE_REGS_IRET            2
-+#define UNWIND_HINT_TYPE_SAVE         3
-+#define UNWIND_HINT_TYPE_RESTORE      4
- 
-+#ifndef __ASSEMBLY__
- /*
-  * This struct is more or less a vastly simplified version of the DWARF Call
-  * Frame Information standard.  It contains only the necessary parts of DWARF
-@@ -82,4 +90,18 @@ struct orc_entry {
-       unsigned        type:2;
- } __packed;
- 
-+/*
-+ * This struct is used by asm and inline asm code to manually annotate the
-+ * location of registers on the stack for the ORC unwinder.
-+ *
-+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
-+ */
-+struct unwind_hint {
-+      u32             ip;
-+      s16             sp_offset;
-+      u8              sp_reg;
-+      u8              type;
-+};
-+#endif /* __ASSEMBLY__ */
-+
- #endif /* _ORC_TYPES_H */
-diff --git a/tools/objtool/check.c b/tools/objtool/check.c
-index cb57c526ba17..368275de5f23 100644
---- a/tools/objtool/check.c
-+++ b/tools/objtool/check.c
-@@ -100,7 +100,6 @@ static bool gcov_enabled(struct objtool_file *file)
- static bool ignore_func(struct objtool_file *file, struct symbol *func)
- {
-       struct rela *rela;
--      struct instruction *insn;
- 
-       /* check for STACK_FRAME_NON_STANDARD */
-       if (file->whitelist && file->whitelist->rela)
-@@ -113,11 +112,6 @@ static bool ignore_func(struct objtool_file *file, struct symbol *func)
-                               return true;
-               }
- 
--      /* check if it has a context switching instruction */
--      func_for_each_insn(file, func, insn)
--              if (insn->type == INSN_CONTEXT_SWITCH)
--                      return true;
--
-       return false;
- }
- 
-@@ -879,6 +873,99 @@ static int add_switch_table_alts(struct objtool_file *file)
-       return 0;
- }
- 
-+static int read_unwind_hints(struct objtool_file *file)
-+{
-+      struct section *sec, *relasec;
-+      struct rela *rela;
-+      struct unwind_hint *hint;
-+      struct instruction *insn;
-+      struct cfi_reg *cfa;
-+      int i;
-+
-+      sec = find_section_by_name(file->elf, ".discard.unwind_hints");
-+      if (!sec)
-+              return 0;
-+
-+      relasec = sec->rela;
-+      if (!relasec) {
-+              WARN("missing .rela.discard.unwind_hints section");
-+              return -1;
-+      }
-+
-+      if (sec->len % sizeof(struct unwind_hint)) {
-+              WARN("struct unwind_hint size mismatch");
-+              return -1;
-+      }
-+
-+      file->hints = true;
-+
-+      for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) {
-+              hint = (struct unwind_hint *)sec->data->d_buf + i;
-+
-+              rela = find_rela_by_dest(sec, i * sizeof(*hint));
-+              if (!rela) {
-+                      WARN("can't find rela for unwind_hints[%d]", i);
-+                      return -1;
-+              }
-+
-+              insn = find_insn(file, rela->sym->sec, rela->addend);
-+              if (!insn) {
-+                      WARN("can't find insn for unwind_hints[%d]", i);
-+                      return -1;
-+              }
-+
-+              cfa = &insn->state.cfa;
-+
-+              if (hint->type == UNWIND_HINT_TYPE_SAVE) {
-+                      insn->save = true;
-+                      continue;
-+
-+              } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
-+                      insn->restore = true;
-+                      insn->hint = true;
-+                      continue;
-+              }
-+
-+              insn->hint = true;
-+
-+              switch (hint->sp_reg) {
-+              case ORC_REG_UNDEFINED:
-+                      cfa->base = CFI_UNDEFINED;
-+                      break;
-+              case ORC_REG_SP:
-+                      cfa->base = CFI_SP;
-+                      break;
-+              case ORC_REG_BP:
-+                      cfa->base = CFI_BP;
-+                      break;
-+              case ORC_REG_SP_INDIRECT:
-+                      cfa->base = CFI_SP_INDIRECT;
-+                      break;
-+              case ORC_REG_R10:
-+                      cfa->base = CFI_R10;
-+                      break;
-+              case ORC_REG_R13:
-+                      cfa->base = CFI_R13;
-+                      break;
-+              case ORC_REG_DI:
-+                      cfa->base = CFI_DI;
-+                      break;
-+              case ORC_REG_DX:
-+                      cfa->base = CFI_DX;
-+                      break;
-+              default:
-+                      WARN_FUNC("unsupported unwind_hint sp base reg %d",
-+                                insn->sec, insn->offset, hint->sp_reg);
-+                      return -1;
-+              }
-+
-+              cfa->offset = hint->sp_offset;
-+              insn->state.type = hint->type;
-+      }
-+
-+      return 0;
-+}
-+
- static int decode_sections(struct objtool_file *file)
- {
-       int ret;
-@@ -909,6 +996,10 @@ static int decode_sections(struct objtool_file *file)
-       if (ret)
-               return ret;
- 
-+      ret = read_unwind_hints(file);
-+      if (ret)
-+              return ret;
-+
-       return 0;
- }
- 
-@@ -1382,7 +1473,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
-                          struct insn_state state)
- {
-       struct alternative *alt;
--      struct instruction *insn;
-+      struct instruction *insn, *next_insn;
-       struct section *sec;
-       struct symbol *func = NULL;
-       int ret;
-@@ -1397,6 +1488,8 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
-       }
- 
-       while (1) {
-+              next_insn = next_insn_same_sec(file, insn);
-+
-               if (file->c_file && insn->func) {
-                       if (func && func != insn->func) {
-                               WARN("%s() falls through to next function %s()",
-@@ -1414,13 +1507,54 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
-               }
- 
-               if (insn->visited) {
--                      if (!!insn_state_match(insn, &state))
-+                      if (!insn->hint && !insn_state_match(insn, &state))
-                               return 1;
- 
-                       return 0;
-               }
- 
--              insn->state = state;
-+              if (insn->hint) {
-+                      if (insn->restore) {
-+                              struct instruction *save_insn, *i;
-+
-+                              i = insn;
-+                              save_insn = NULL;
-+                              func_for_each_insn_continue_reverse(file, func, i) {
-+                                      if (i->save) {
-+                                              save_insn = i;
-+                                              break;
-+                                      }
-+                              }
-+
-+                              if (!save_insn) {
-+                                      WARN_FUNC("no corresponding CFI save for CFI restore",
-+                                                sec, insn->offset);
-+                                      return 1;
-+                              }
-+
-+                              if (!save_insn->visited) {
-+                                      /*
-+                                       * Oops, no state to copy yet.
-+                                       * Hopefully we can reach this
-+                                       * instruction from another branch
-+                                       * after the save insn has been
-+                                       * visited.
-+                                       */
-+                                      if (insn == first)
-+                                              return 0;
-+
-+                                      WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
-+                                                sec, insn->offset);
-+                                      return 1;
-+                              }
-+
-+                              insn->state = save_insn->state;
-+                      }
-+
-+                      state = insn->state;
-+
-+              } else
-+                      insn->state = state;
- 
-               insn->visited = true;
- 
-@@ -1497,6 +1631,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
- 
-                       return 0;
- 
-+              case INSN_CONTEXT_SWITCH:
-+                      if (func && (!next_insn || !next_insn->hint)) {
-+                              WARN_FUNC("unsupported instruction in callable function",
-+                                        sec, insn->offset);
-+                              return 1;
-+                      }
-+                      return 0;
-+
-               case INSN_STACK:
-                       if (update_insn_state(insn, &state))
-                               return -1;
-@@ -1510,7 +1652,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
-               if (insn->dead_end)
-                       return 0;
- 
--              insn = next_insn_same_sec(file, insn);
-+              insn = next_insn;
-               if (!insn) {
-                       WARN("%s: unexpected end of section", sec->name);
-                       return 1;
-@@ -1520,6 +1662,27 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
-       return 0;
- }
- 
-+static int validate_unwind_hints(struct objtool_file *file)
-+{
-+      struct instruction *insn;
-+      int ret, warnings = 0;
-+      struct insn_state state;
-+
-+      if (!file->hints)
-+              return 0;
-+
-+      clear_insn_state(&state);
-+
-+      for_each_insn(file, insn) {
-+              if (insn->hint && !insn->visited) {
-+                      ret = validate_branch(file, insn, state);
-+                      warnings += ret;
-+              }
-+      }
-+
-+      return warnings;
-+}
-+
- static bool is_kasan_insn(struct instruction *insn)
- {
-       return (insn->type == INSN_CALL &&
-@@ -1665,8 +1828,9 @@ int check(const char *_objname, bool _nofp, bool orc)
-       hash_init(file.insn_hash);
-       file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
-       file.rodata = find_section_by_name(file.elf, ".rodata");
--      file.ignore_unreachables = false;
-       file.c_file = find_section_by_name(file.elf, ".comment");
-+      file.ignore_unreachables = false;
-+      file.hints = false;
- 
-       arch_initial_func_cfi_state(&initial_func_cfi);
- 
-@@ -1683,6 +1847,11 @@ int check(const char *_objname, bool _nofp, bool orc)
-               goto out;
-       warnings += ret;
- 
-+      ret = validate_unwind_hints(&file);
-+      if (ret < 0)
-+              goto out;
-+      warnings += ret;
-+
-       if (!warnings) {
-               ret = validate_reachable_instructions(&file);
-               if (ret < 0)
--- 
-2.14.2
-
diff --git a/patches/kernel/0037-objtool-x86-Add-facility-for-asm-code-to-provide-unw.patch b/patches/kernel/0037-objtool-x86-Add-facility-for-asm-code-to-provide-unw.patch

new file mode 100644 (file)

index 0000000..3c4000c
--- /dev/null
+++ b/patches/kernel/0037-objtool-x86-Add-facility-for-asm-code-to-provide-unw.patch
@@ -0,0 +1,641 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Tue, 11 Jul 2017 10:33:43 -0500
+Subject: [PATCH] objtool, x86: Add facility for asm code to provide unwind
+ hints
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Some asm (and inline asm) code does special things to the stack which
+objtool can't understand.  (Nor can GCC or GNU assembler, for that
+matter.)  In such cases we need a facility for the code to provide
+annotations, so the unwinder can unwind through it.
+
+This provides such a facility, in the form of unwind hints.  They're
+similar to the GNU assembler .cfi* directives, but they give more
+information, and are needed in far fewer places, because objtool can
+fill in the blanks by following branches and adjusting the stack pointer
+for pushes and pops.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: live-patching@vger.kernel.org
+Link: http://lkml.kernel.org/r/0f5f3c9104fca559ff4088bece1d14ae3bca52d5.1499786555.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 39358a033b2e4432052265c1fa0f36f572d8cfb5)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit a1fed2e10e84d48643a09861c2d127968621813e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/objtool/Makefile              |   3 +
+ arch/x86/include/asm/orc_types.h    | 107 ++++++++++++++++++++
+ arch/x86/include/asm/unwind_hints.h | 103 +++++++++++++++++++
+ tools/objtool/check.h               |   4 +-
+ tools/objtool/orc_types.h           |  22 +++++
+ tools/objtool/check.c               | 191 +++++++++++++++++++++++++++++++++---
+ 6 files changed, 417 insertions(+), 13 deletions(-)
+ create mode 100644 arch/x86/include/asm/orc_types.h
+ create mode 100644 arch/x86/include/asm/unwind_hints.h
+
+diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
+index 0e2765e243c0..3a6425fefc43 100644
+--- a/tools/objtool/Makefile
++++ b/tools/objtool/Makefile
+@@ -52,6 +52,9 @@ $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
+       diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
+       diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
+       || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true
++      @(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
++      diff ../../arch/x86/include/asm/orc_types.h orc_types.h >/dev/null) \
++      || echo "warning: objtool: orc_types.h differs from kernel" >&2 )) || true
+       $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
+ 
+ 
+diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
+new file mode 100644
+index 000000000000..7dc777a6cb40
+--- /dev/null
++++ b/arch/x86/include/asm/orc_types.h
+@@ -0,0 +1,107 @@
++/*
++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, see <http://www.gnu.org/licenses/>.
++ */
++
++#ifndef _ORC_TYPES_H
++#define _ORC_TYPES_H
++
++#include <linux/types.h>
++#include <linux/compiler.h>
++
++/*
++ * The ORC_REG_* registers are base registers which are used to find other
++ * registers on the stack.
++ *
++ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
++ * address of the previous frame: the caller's SP before it called the current
++ * function.
++ *
++ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
++ * the current frame.
++ *
++ * The most commonly used base registers are SP and BP -- which the previous SP
++ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is
++ * usually based on.
++ *
++ * The rest of the base registers are needed for special cases like entry code
++ * and GCC realigned stacks.
++ */
++#define ORC_REG_UNDEFINED             0
++#define ORC_REG_PREV_SP                       1
++#define ORC_REG_DX                    2
++#define ORC_REG_DI                    3
++#define ORC_REG_BP                    4
++#define ORC_REG_SP                    5
++#define ORC_REG_R10                   6
++#define ORC_REG_R13                   7
++#define ORC_REG_BP_INDIRECT           8
++#define ORC_REG_SP_INDIRECT           9
++#define ORC_REG_MAX                   15
++
++/*
++ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
++ * caller's SP right before it made the call).  Used for all callable
++ * functions, i.e. all C code and all callable asm functions.
++ *
++ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
++ * to a fully populated pt_regs from a syscall, interrupt, or exception.
++ *
++ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
++ * points to the iret return frame.
++ *
++ * The UNWIND_HINT macros are used only for the unwind_hint struct.  They
++ * aren't used in struct orc_entry due to size and complexity constraints.
++ * Objtool converts them to real types when it converts the hints to orc
++ * entries.
++ */
++#define ORC_TYPE_CALL                 0
++#define ORC_TYPE_REGS                 1
++#define ORC_TYPE_REGS_IRET            2
++#define UNWIND_HINT_TYPE_SAVE         3
++#define UNWIND_HINT_TYPE_RESTORE      4
++
++#ifndef __ASSEMBLY__
++/*
++ * This struct is more or less a vastly simplified version of the DWARF Call
++ * Frame Information standard.  It contains only the necessary parts of DWARF
++ * CFI, simplified for ease of access by the in-kernel unwinder.  It tells the
++ * unwinder how to find the previous SP and BP (and sometimes entry regs) on
++ * the stack for a given code address.  Each instance of the struct corresponds
++ * to one or more code locations.
++ */
++struct orc_entry {
++      s16             sp_offset;
++      s16             bp_offset;
++      unsigned        sp_reg:4;
++      unsigned        bp_reg:4;
++      unsigned        type:2;
++};
++
++/*
++ * This struct is used by asm and inline asm code to manually annotate the
++ * location of registers on the stack for the ORC unwinder.
++ *
++ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
++ */
++struct unwind_hint {
++      u32             ip;
++      s16             sp_offset;
++      u8              sp_reg;
++      u8              type;
++};
++#endif /* __ASSEMBLY__ */
++
++#endif /* _ORC_TYPES_H */
+diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
+new file mode 100644
+index 000000000000..5e02b11c9b86
+--- /dev/null
++++ b/arch/x86/include/asm/unwind_hints.h
+@@ -0,0 +1,103 @@
++#ifndef _ASM_X86_UNWIND_HINTS_H
++#define _ASM_X86_UNWIND_HINTS_H
++
++#include "orc_types.h"
++
++#ifdef __ASSEMBLY__
++
++/*
++ * In asm, there are two kinds of code: normal C-type callable functions and
++ * the rest.  The normal callable functions can be called by other code, and
++ * don't do anything unusual with the stack.  Such normal callable functions
++ * are annotated with the ENTRY/ENDPROC macros.  Most asm code falls in this
++ * category.  In this case, no special debugging annotations are needed because
++ * objtool can automatically generate the ORC data for the ORC unwinder to read
++ * at runtime.
++ *
++ * Anything which doesn't fall into the above category, such as syscall and
++ * interrupt handlers, tends to not be called directly by other functions, and
++ * often does unusual non-C-function-type things with the stack pointer.  Such
++ * code needs to be annotated such that objtool can understand it.  The
++ * following CFI hint macros are for this type of code.
++ *
++ * These macros provide hints to objtool about the state of the stack at each
++ * instruction.  Objtool starts from the hints and follows the code flow,
++ * making automatic CFI adjustments when it sees pushes and pops, filling out
++ * the debuginfo as necessary.  It will also warn if it sees any
++ * inconsistencies.
++ */
++.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL
++#ifdef CONFIG_STACK_VALIDATION
++.Lunwind_hint_ip_\@:
++      .pushsection .discard.unwind_hints
++              /* struct unwind_hint */
++              .long .Lunwind_hint_ip_\@ - .
++              .short \sp_offset
++              .byte \sp_reg
++              .byte \type
++      .popsection
++#endif
++.endm
++
++.macro UNWIND_HINT_EMPTY
++      UNWIND_HINT sp_reg=ORC_REG_UNDEFINED
++.endm
++
++.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
++      .if \base == %rsp && \indirect
++              .set sp_reg, ORC_REG_SP_INDIRECT
++      .elseif \base == %rsp
++              .set sp_reg, ORC_REG_SP
++      .elseif \base == %rbp
++              .set sp_reg, ORC_REG_BP
++      .elseif \base == %rdi
++              .set sp_reg, ORC_REG_DI
++      .elseif \base == %rdx
++              .set sp_reg, ORC_REG_DX
++      .elseif \base == %r10
++              .set sp_reg, ORC_REG_R10
++      .else
++              .error "UNWIND_HINT_REGS: bad base register"
++      .endif
++
++      .set sp_offset, \offset
++
++      .if \iret
++              .set type, ORC_TYPE_REGS_IRET
++      .elseif \extra == 0
++              .set type, ORC_TYPE_REGS_IRET
++              .set sp_offset, \offset + (16*8)
++      .else
++              .set type, ORC_TYPE_REGS
++      .endif
++
++      UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
++.endm
++
++.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
++      UNWIND_HINT_REGS base=\base offset=\offset iret=1
++.endm
++
++.macro UNWIND_HINT_FUNC sp_offset=8
++      UNWIND_HINT sp_offset=\sp_offset
++.endm
++
++#else /* !__ASSEMBLY__ */
++
++#define UNWIND_HINT(sp_reg, sp_offset, type)                  \
++      "987: \n\t"                                             \
++      ".pushsection .discard.unwind_hints\n\t"                \
++      /* struct unwind_hint */                                \
++      ".long 987b - .\n\t"                                    \
++      ".short " __stringify(sp_offset) "\n\t"         \
++      ".byte " __stringify(sp_reg) "\n\t"                     \
++      ".byte " __stringify(type) "\n\t"                       \
++      ".popsection\n\t"
++
++#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE)
++
++#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE)
++
++#endif /* __ASSEMBLY__ */
++
++#endif /* _ASM_X86_UNWIND_HINTS_H */
+diff --git a/tools/objtool/check.h b/tools/objtool/check.h
+index 046874bbe226..ac3d4b13f17b 100644
+--- a/tools/objtool/check.h
++++ b/tools/objtool/check.h
+@@ -43,7 +43,7 @@ struct instruction {
+       unsigned int len;
+       unsigned char type;
+       unsigned long immediate;
+-      bool alt_group, visited, dead_end, ignore;
++      bool alt_group, visited, dead_end, ignore, hint, save, restore;
+       struct symbol *call_dest;
+       struct instruction *jump_dest;
+       struct list_head alts;
+@@ -58,7 +58,7 @@ struct objtool_file {
+       struct list_head insn_list;
+       DECLARE_HASHTABLE(insn_hash, 16);
+       struct section *rodata, *whitelist;
+-      bool ignore_unreachables, c_file;
++      bool ignore_unreachables, c_file, hints;
+ };
+ 
+ int check(const char *objname, bool nofp, bool orc);
+diff --git a/tools/objtool/orc_types.h b/tools/objtool/orc_types.h
+index fc5cf6cffd9a..9c9dc579bd7d 100644
+--- a/tools/objtool/orc_types.h
++++ b/tools/objtool/orc_types.h
+@@ -61,11 +61,19 @@
+  *
+  * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
+  * points to the iret return frame.
++ *
++ * The UNWIND_HINT macros are used only for the unwind_hint struct.  They
++ * aren't used in struct orc_entry due to size and complexity constraints.
++ * Objtool converts them to real types when it converts the hints to orc
++ * entries.
+  */
+ #define ORC_TYPE_CALL                 0
+ #define ORC_TYPE_REGS                 1
+ #define ORC_TYPE_REGS_IRET            2
++#define UNWIND_HINT_TYPE_SAVE         3
++#define UNWIND_HINT_TYPE_RESTORE      4
+ 
++#ifndef __ASSEMBLY__
+ /*
+  * This struct is more or less a vastly simplified version of the DWARF Call
+  * Frame Information standard.  It contains only the necessary parts of DWARF
+@@ -82,4 +90,18 @@ struct orc_entry {
+       unsigned        type:2;
+ } __packed;
+ 
++/*
++ * This struct is used by asm and inline asm code to manually annotate the
++ * location of registers on the stack for the ORC unwinder.
++ *
++ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
++ */
++struct unwind_hint {
++      u32             ip;
++      s16             sp_offset;
++      u8              sp_reg;
++      u8              type;
++};
++#endif /* __ASSEMBLY__ */
++
+ #endif /* _ORC_TYPES_H */
+diff --git a/tools/objtool/check.c b/tools/objtool/check.c
+index cb57c526ba17..368275de5f23 100644
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -100,7 +100,6 @@ static bool gcov_enabled(struct objtool_file *file)
+ static bool ignore_func(struct objtool_file *file, struct symbol *func)
+ {
+       struct rela *rela;
+-      struct instruction *insn;
+ 
+       /* check for STACK_FRAME_NON_STANDARD */
+       if (file->whitelist && file->whitelist->rela)
+@@ -113,11 +112,6 @@ static bool ignore_func(struct objtool_file *file, struct symbol *func)
+                               return true;
+               }
+ 
+-      /* check if it has a context switching instruction */
+-      func_for_each_insn(file, func, insn)
+-              if (insn->type == INSN_CONTEXT_SWITCH)
+-                      return true;
+-
+       return false;
+ }
+ 
+@@ -879,6 +873,99 @@ static int add_switch_table_alts(struct objtool_file *file)
+       return 0;
+ }
+ 
++static int read_unwind_hints(struct objtool_file *file)
++{
++      struct section *sec, *relasec;
++      struct rela *rela;
++      struct unwind_hint *hint;
++      struct instruction *insn;
++      struct cfi_reg *cfa;
++      int i;
++
++      sec = find_section_by_name(file->elf, ".discard.unwind_hints");
++      if (!sec)
++              return 0;
++
++      relasec = sec->rela;
++      if (!relasec) {
++              WARN("missing .rela.discard.unwind_hints section");
++              return -1;
++      }
++
++      if (sec->len % sizeof(struct unwind_hint)) {
++              WARN("struct unwind_hint size mismatch");
++              return -1;
++      }
++
++      file->hints = true;
++
++      for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) {
++              hint = (struct unwind_hint *)sec->data->d_buf + i;
++
++              rela = find_rela_by_dest(sec, i * sizeof(*hint));
++              if (!rela) {
++                      WARN("can't find rela for unwind_hints[%d]", i);
++                      return -1;
++              }
++
++              insn = find_insn(file, rela->sym->sec, rela->addend);
++              if (!insn) {
++                      WARN("can't find insn for unwind_hints[%d]", i);
++                      return -1;
++              }
++
++              cfa = &insn->state.cfa;
++
++              if (hint->type == UNWIND_HINT_TYPE_SAVE) {
++                      insn->save = true;
++                      continue;
++
++              } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
++                      insn->restore = true;
++                      insn->hint = true;
++                      continue;
++              }
++
++              insn->hint = true;
++
++              switch (hint->sp_reg) {
++              case ORC_REG_UNDEFINED:
++                      cfa->base = CFI_UNDEFINED;
++                      break;
++              case ORC_REG_SP:
++                      cfa->base = CFI_SP;
++                      break;
++              case ORC_REG_BP:
++                      cfa->base = CFI_BP;
++                      break;
++              case ORC_REG_SP_INDIRECT:
++                      cfa->base = CFI_SP_INDIRECT;
++                      break;
++              case ORC_REG_R10:
++                      cfa->base = CFI_R10;
++                      break;
++              case ORC_REG_R13:
++                      cfa->base = CFI_R13;
++                      break;
++              case ORC_REG_DI:
++                      cfa->base = CFI_DI;
++                      break;
++              case ORC_REG_DX:
++                      cfa->base = CFI_DX;
++                      break;
++              default:
++                      WARN_FUNC("unsupported unwind_hint sp base reg %d",
++                                insn->sec, insn->offset, hint->sp_reg);
++                      return -1;
++              }
++
++              cfa->offset = hint->sp_offset;
++              insn->state.type = hint->type;
++      }
++
++      return 0;
++}
++
+ static int decode_sections(struct objtool_file *file)
+ {
+       int ret;
+@@ -909,6 +996,10 @@ static int decode_sections(struct objtool_file *file)
+       if (ret)
+               return ret;
+ 
++      ret = read_unwind_hints(file);
++      if (ret)
++              return ret;
++
+       return 0;
+ }
+ 
+@@ -1382,7 +1473,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
+                          struct insn_state state)
+ {
+       struct alternative *alt;
+-      struct instruction *insn;
++      struct instruction *insn, *next_insn;
+       struct section *sec;
+       struct symbol *func = NULL;
+       int ret;
+@@ -1397,6 +1488,8 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
+       }
+ 
+       while (1) {
++              next_insn = next_insn_same_sec(file, insn);
++
+               if (file->c_file && insn->func) {
+                       if (func && func != insn->func) {
+                               WARN("%s() falls through to next function %s()",
+@@ -1414,13 +1507,54 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
+               }
+ 
+               if (insn->visited) {
+-                      if (!!insn_state_match(insn, &state))
++                      if (!insn->hint && !insn_state_match(insn, &state))
+                               return 1;
+ 
+                       return 0;
+               }
+ 
+-              insn->state = state;
++              if (insn->hint) {
++                      if (insn->restore) {
++                              struct instruction *save_insn, *i;
++
++                              i = insn;
++                              save_insn = NULL;
++                              func_for_each_insn_continue_reverse(file, func, i) {
++                                      if (i->save) {
++                                              save_insn = i;
++                                              break;
++                                      }
++                              }
++
++                              if (!save_insn) {
++                                      WARN_FUNC("no corresponding CFI save for CFI restore",
++                                                sec, insn->offset);
++                                      return 1;
++                              }
++
++                              if (!save_insn->visited) {
++                                      /*
++                                       * Oops, no state to copy yet.
++                                       * Hopefully we can reach this
++                                       * instruction from another branch
++                                       * after the save insn has been
++                                       * visited.
++                                       */
++                                      if (insn == first)
++                                              return 0;
++
++                                      WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
++                                                sec, insn->offset);
++                                      return 1;
++                              }
++
++                              insn->state = save_insn->state;
++                      }
++
++                      state = insn->state;
++
++              } else
++                      insn->state = state;
+ 
+               insn->visited = true;
+ 
+@@ -1497,6 +1631,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
+ 
+                       return 0;
+ 
++              case INSN_CONTEXT_SWITCH:
++                      if (func && (!next_insn || !next_insn->hint)) {
++                              WARN_FUNC("unsupported instruction in callable function",
++                                        sec, insn->offset);
++                              return 1;
++                      }
++                      return 0;
++
+               case INSN_STACK:
+                       if (update_insn_state(insn, &state))
+                               return -1;
+@@ -1510,7 +1652,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
+               if (insn->dead_end)
+                       return 0;
+ 
+-              insn = next_insn_same_sec(file, insn);
++              insn = next_insn;
+               if (!insn) {
+                       WARN("%s: unexpected end of section", sec->name);
+                       return 1;
+@@ -1520,6 +1662,27 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
+       return 0;
+ }
+ 
++static int validate_unwind_hints(struct objtool_file *file)
++{
++      struct instruction *insn;
++      int ret, warnings = 0;
++      struct insn_state state;
++
++      if (!file->hints)
++              return 0;
++
++      clear_insn_state(&state);
++
++      for_each_insn(file, insn) {
++              if (insn->hint && !insn->visited) {
++                      ret = validate_branch(file, insn, state);
++                      warnings += ret;
++              }
++      }
++
++      return warnings;
++}
++
+ static bool is_kasan_insn(struct instruction *insn)
+ {
+       return (insn->type == INSN_CALL &&
+@@ -1665,8 +1828,9 @@ int check(const char *_objname, bool _nofp, bool orc)
+       hash_init(file.insn_hash);
+       file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
+       file.rodata = find_section_by_name(file.elf, ".rodata");
+-      file.ignore_unreachables = false;
+       file.c_file = find_section_by_name(file.elf, ".comment");
++      file.ignore_unreachables = false;
++      file.hints = false;
+ 
+       arch_initial_func_cfi_state(&initial_func_cfi);
+ 
+@@ -1683,6 +1847,11 @@ int check(const char *_objname, bool _nofp, bool orc)
+               goto out;
+       warnings += ret;
+ 
++      ret = validate_unwind_hints(&file);
++      if (ret < 0)
++              goto out;
++      warnings += ret;
++
+       if (!warnings) {
+               ret = validate_reachable_instructions(&file);
+               if (ret < 0)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0037-x86-unwind-Add-the-ORC-unwinder.patch b/patches/kernel/0037-x86-unwind-Add-the-ORC-unwinder.patch

deleted file mode 100644 (file)

index b8f0318..0000000
--- a/patches/kernel/0037-x86-unwind-Add-the-ORC-unwinder.patch
+++ /dev/null
@@ -1,1407 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Mon, 24 Jul 2017 18:36:57 -0500
-Subject: [PATCH] x86/unwind: Add the ORC unwinder
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Add the new ORC unwinder which is enabled by CONFIG_ORC_UNWINDER=y.
-It plugs into the existing x86 unwinder framework.
-
-It relies on objtool to generate the needed .orc_unwind and
-.orc_unwind_ip sections.
-
-For more details on why ORC is used instead of DWARF, see
-Documentation/x86/orc-unwinder.txt - but the short version is
-that it's a simplified, fundamentally more robust debugninfo
-data structure, which also allows up to two orders of magnitude
-faster lookups than the DWARF unwinder - which matters to
-profiling workloads like perf.
-
-Thanks to Andy Lutomirski for the performance improvement ideas:
-splitting the ORC unwind table into two parallel arrays and creating a
-fast lookup table to search a subset of the unwind table.
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mike Galbraith <efault@gmx.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: live-patching@vger.kernel.org
-Link: http://lkml.kernel.org/r/0a6cbfb40f8da99b7a45a1a8302dc6aef16ec812.1500938583.git.jpoimboe@redhat.com
-[ Extended the changelog. ]
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit ee9f8fce99640811b2b8e79d0d1dbe8bab69ba67)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit dccbf63d7a6cc431af23a86e28275a74904545cd)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/x86/orc-unwinder.txt | 179 ++++++++++++
- arch/x86/kernel/Makefile           |   8 +-
- scripts/Makefile.build             |  14 +-
- arch/um/include/asm/unwind.h       |   8 +
- arch/x86/include/asm/module.h      |   9 +
- arch/x86/include/asm/orc_lookup.h  |  46 +++
- arch/x86/include/asm/orc_types.h   |   2 +-
- arch/x86/include/asm/unwind.h      |  76 +++--
- include/asm-generic/vmlinux.lds.h  |  27 +-
- arch/x86/kernel/module.c           |  11 +-
- arch/x86/kernel/setup.c            |   3 +
- arch/x86/kernel/unwind_frame.c     |  39 +--
- arch/x86/kernel/unwind_guess.c     |   5 +
- arch/x86/kernel/unwind_orc.c       | 582 +++++++++++++++++++++++++++++++++++++
- arch/x86/Kconfig                   |   1 +
- arch/x86/Kconfig.debug             |  25 ++
- arch/x86/kernel/vmlinux.lds.S      |   3 +
- lib/Kconfig.debug                  |   3 +
- 18 files changed, 977 insertions(+), 64 deletions(-)
- create mode 100644 Documentation/x86/orc-unwinder.txt
- create mode 100644 arch/um/include/asm/unwind.h
- create mode 100644 arch/x86/include/asm/orc_lookup.h
- create mode 100644 arch/x86/kernel/unwind_orc.c
-
-diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
-new file mode 100644
-index 000000000000..af0c9a4c65a6
---- /dev/null
-+++ b/Documentation/x86/orc-unwinder.txt
-@@ -0,0 +1,179 @@
-+ORC unwinder
-+============
-+
-+Overview
-+--------
-+
-+The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
-+similar in concept to a DWARF unwinder.  The difference is that the
-+format of the ORC data is much simpler than DWARF, which in turn allows
-+the ORC unwinder to be much simpler and faster.
-+
-+The ORC data consists of unwind tables which are generated by objtool.
-+They contain out-of-band data which is used by the in-kernel ORC
-+unwinder.  Objtool generates the ORC data by first doing compile-time
-+stack metadata validation (CONFIG_STACK_VALIDATION).  After analyzing
-+all the code paths of a .o file, it determines information about the
-+stack state at each instruction address in the file and outputs that
-+information to the .orc_unwind and .orc_unwind_ip sections.
-+
-+The per-object ORC sections are combined at link time and are sorted and
-+post-processed at boot time.  The unwinder uses the resulting data to
-+correlate instruction addresses with their stack states at run time.
-+
-+
-+ORC vs frame pointers
-+---------------------
-+
-+With frame pointers enabled, GCC adds instrumentation code to every
-+function in the kernel.  The kernel's .text size increases by about
-+3.2%, resulting in a broad kernel-wide slowdown.  Measurements by Mel
-+Gorman [1] have shown a slowdown of 5-10% for some workloads.
-+
-+In contrast, the ORC unwinder has no effect on text size or runtime
-+performance, because the debuginfo is out of band.  So if you disable
-+frame pointers and enable the ORC unwinder, you get a nice performance
-+improvement across the board, and still have reliable stack traces.
-+
-+Ingo Molnar says:
-+
-+  "Note that it's not just a performance improvement, but also an
-+  instruction cache locality improvement: 3.2% .text savings almost
-+  directly transform into a similarly sized reduction in cache
-+  footprint. That can transform to even higher speedups for workloads
-+  whose cache locality is borderline."
-+
-+Another benefit of ORC compared to frame pointers is that it can
-+reliably unwind across interrupts and exceptions.  Frame pointer based
-+unwinds can sometimes skip the caller of the interrupted function, if it
-+was a leaf function or if the interrupt hit before the frame pointer was
-+saved.
-+
-+The main disadvantage of the ORC unwinder compared to frame pointers is
-+that it needs more memory to store the ORC unwind tables: roughly 2-4MB
-+depending on the kernel config.
-+
-+
-+ORC vs DWARF
-+------------
-+
-+ORC debuginfo's advantage over DWARF itself is that it's much simpler.
-+It gets rid of the complex DWARF CFI state machine and also gets rid of
-+the tracking of unnecessary registers.  This allows the unwinder to be
-+much simpler, meaning fewer bugs, which is especially important for
-+mission critical oops code.
-+
-+The simpler debuginfo format also enables the unwinder to be much faster
-+than DWARF, which is important for perf and lockdep.  In a basic
-+performance test by Jiri Slaby [2], the ORC unwinder was about 20x
-+faster than an out-of-tree DWARF unwinder.  (Note: That measurement was
-+taken before some performance tweaks were added, which doubled
-+performance, so the speedup over DWARF may be closer to 40x.)
-+
-+The ORC data format does have a few downsides compared to DWARF.  ORC
-+unwind tables take up ~50% more RAM (+1.3MB on an x86 defconfig kernel)
-+than DWARF-based eh_frame tables.
-+
-+Another potential downside is that, as GCC evolves, it's conceivable
-+that the ORC data may end up being *too* simple to describe the state of
-+the stack for certain optimizations.  But IMO this is unlikely because
-+GCC saves the frame pointer for any unusual stack adjustments it does,
-+so I suspect we'll really only ever need to keep track of the stack
-+pointer and the frame pointer between call frames.  But even if we do
-+end up having to track all the registers DWARF tracks, at least we will
-+still be able to control the format, e.g. no complex state machines.
-+
-+
-+ORC unwind table generation
-+---------------------------
-+
-+The ORC data is generated by objtool.  With the existing compile-time
-+stack metadata validation feature, objtool already follows all code
-+paths, and so it already has all the information it needs to be able to
-+generate ORC data from scratch.  So it's an easy step to go from stack
-+validation to ORC data generation.
-+
-+It should be possible to instead generate the ORC data with a simple
-+tool which converts DWARF to ORC data.  However, such a solution would
-+be incomplete due to the kernel's extensive use of asm, inline asm, and
-+special sections like exception tables.
-+
-+That could be rectified by manually annotating those special code paths
-+using GNU assembler .cfi annotations in .S files, and homegrown
-+annotations for inline asm in .c files.  But asm annotations were tried
-+in the past and were found to be unmaintainable.  They were often
-+incorrect/incomplete and made the code harder to read and keep updated.
-+And based on looking at glibc code, annotating inline asm in .c files
-+might be even worse.
-+
-+Objtool still needs a few annotations, but only in code which does
-+unusual things to the stack like entry code.  And even then, far fewer
-+annotations are needed than what DWARF would need, so they're much more
-+maintainable than DWARF CFI annotations.
-+
-+So the advantages of using objtool to generate ORC data are that it
-+gives more accurate debuginfo, with very few annotations.  It also
-+insulates the kernel from toolchain bugs which can be very painful to
-+deal with in the kernel since we often have to workaround issues in
-+older versions of the toolchain for years.
-+
-+The downside is that the unwinder now becomes dependent on objtool's
-+ability to reverse engineer GCC code flow.  If GCC optimizations become
-+too complicated for objtool to follow, the ORC data generation might
-+stop working or become incomplete.  (It's worth noting that livepatch
-+already has such a dependency on objtool's ability to follow GCC code
-+flow.)
-+
-+If newer versions of GCC come up with some optimizations which break
-+objtool, we may need to revisit the current implementation.  Some
-+possible solutions would be asking GCC to make the optimizations more
-+palatable, or having objtool use DWARF as an additional input, or
-+creating a GCC plugin to assist objtool with its analysis.  But for now,
-+objtool follows GCC code quite well.
-+
-+
-+Unwinder implementation details
-+-------------------------------
-+
-+Objtool generates the ORC data by integrating with the compile-time
-+stack metadata validation feature, which is described in detail in
-+tools/objtool/Documentation/stack-validation.txt.  After analyzing all
-+the code paths of a .o file, it creates an array of orc_entry structs,
-+and a parallel array of instruction addresses associated with those
-+structs, and writes them to the .orc_unwind and .orc_unwind_ip sections
-+respectively.
-+
-+The ORC data is split into the two arrays for performance reasons, to
-+make the searchable part of the data (.orc_unwind_ip) more compact.  The
-+arrays are sorted in parallel at boot time.
-+
-+Performance is further improved by the use of a fast lookup table which
-+is created at runtime.  The fast lookup table associates a given address
-+with a range of indices for the .orc_unwind table, so that only a small
-+subset of the table needs to be searched.
-+
-+
-+Etymology
-+---------
-+
-+Orcs, fearsome creatures of medieval folklore, are the Dwarves' natural
-+enemies.  Similarly, the ORC unwinder was created in opposition to the
-+complexity and slowness of DWARF.
-+
-+"Although Orcs rarely consider multiple solutions to a problem, they do
-+excel at getting things done because they are creatures of action, not
-+thought." [3]  Similarly, unlike the esoteric DWARF unwinder, the
-+veracious ORC unwinder wastes no time or siloconic effort decoding
-+variable-length zero-extended unsigned-integer byte-coded
-+state-machine-based debug information entries.
-+
-+Similar to how Orcs frequently unravel the well-intentioned plans of
-+their adversaries, the ORC unwinder frequently unravels stacks with
-+brutal, unyielding efficiency.
-+
-+ORC stands for Oops Rewind Capability.
-+
-+
-+[1] https://lkml.kernel.org/r/20170602104048.jkkzssljsompjdwy@suse.de
-+[2] https://lkml.kernel.org/r/d2ca5435-6386-29b8-db87-7f227c2b713a@suse.cz
-+[3] http://dustin.wikidot.com/half-orcs-and-orcs
-diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
-index a01892bdd61a..287eac7d207f 100644
---- a/arch/x86/kernel/Makefile
-+++ b/arch/x86/kernel/Makefile
-@@ -126,11 +126,9 @@ obj-$(CONFIG_PERF_EVENTS)         += perf_regs.o
- obj-$(CONFIG_TRACING)                 += tracepoint.o
- obj-$(CONFIG_SCHED_MC_PRIO)           += itmt.o
- 
--ifdef CONFIG_FRAME_POINTER
--obj-y                                 += unwind_frame.o
--else
--obj-y                                 += unwind_guess.o
--endif
-+obj-$(CONFIG_ORC_UNWINDER)            += unwind_orc.o
-+obj-$(CONFIG_FRAME_POINTER_UNWINDER)  += unwind_frame.o
-+obj-$(CONFIG_GUESS_UNWINDER)          += unwind_guess.o
- 
- ###
- # 64 bit specific files
-diff --git a/scripts/Makefile.build b/scripts/Makefile.build
-index 273bc2228307..ab2c8ef43cdb 100644
---- a/scripts/Makefile.build
-+++ b/scripts/Makefile.build
-@@ -258,7 +258,8 @@ ifneq ($(SKIP_STACK_VALIDATION),1)
- 
- __objtool_obj := $(objtree)/tools/objtool/objtool
- 
--objtool_args = check
-+objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check)
-+
- ifndef CONFIG_FRAME_POINTER
- objtool_args += --no-fp
- endif
-@@ -276,6 +277,11 @@ objtool_obj = $(if $(patsubst y%,, \
- endif # SKIP_STACK_VALIDATION
- endif # CONFIG_STACK_VALIDATION
- 
-+# Rebuild all objects when objtool changes, or is enabled/disabled.
-+objtool_dep = $(objtool_obj)                                  \
-+            $(wildcard include/config/orc/unwinder.h          \
-+                       include/config/stack/validation.h)
-+
- define rule_cc_o_c
-       $(call echo-cmd,checksrc) $(cmd_checksrc)                         \
-       $(call cmd_and_fixdep,cc_o_c)                                     \
-@@ -298,14 +304,14 @@ cmd_undef_syms = echo
- endif
- 
- # Built-in and composite module parts
--$(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_obj) FORCE
-+$(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE
-       $(call cmd,force_checksrc)
-       $(call cmd,force_check_kmsg)
-       $(call if_changed_rule,cc_o_c)
- 
- # Single-part modules are special since we need to mark them in $(MODVERDIR)
- 
--$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_obj) FORCE
-+$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE
-       $(call cmd,force_checksrc)
-       $(call cmd,force_check_kmsg)
-       $(call if_changed_rule,cc_o_c)
-@@ -401,7 +407,7 @@ cmd_modversions_S =                                                                \
- endif
- endif
- 
--$(obj)/%.o: $(src)/%.S $(objtool_obj) FORCE
-+$(obj)/%.o: $(src)/%.S $(objtool_dep) FORCE
-       $(call if_changed_rule,as_o_S)
- 
- targets += $(real-objs-y) $(real-objs-m) $(lib-y)
-diff --git a/arch/um/include/asm/unwind.h b/arch/um/include/asm/unwind.h
-new file mode 100644
-index 000000000000..7ffa5437b761
---- /dev/null
-+++ b/arch/um/include/asm/unwind.h
-@@ -0,0 +1,8 @@
-+#ifndef _ASM_UML_UNWIND_H
-+#define _ASM_UML_UNWIND_H
-+
-+static inline void
-+unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
-+                 void *orc, size_t orc_size) {}
-+
-+#endif /* _ASM_UML_UNWIND_H */
-diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
-index e3b7819caeef..9eb7c718aaf8 100644
---- a/arch/x86/include/asm/module.h
-+++ b/arch/x86/include/asm/module.h
-@@ -2,6 +2,15 @@
- #define _ASM_X86_MODULE_H
- 
- #include <asm-generic/module.h>
-+#include <asm/orc_types.h>
-+
-+struct mod_arch_specific {
-+#ifdef CONFIG_ORC_UNWINDER
-+      unsigned int num_orcs;
-+      int *orc_unwind_ip;
-+      struct orc_entry *orc_unwind;
-+#endif
-+};
- 
- #ifdef CONFIG_X86_64
- /* X86_64 does not define MODULE_PROC_FAMILY */
-diff --git a/arch/x86/include/asm/orc_lookup.h b/arch/x86/include/asm/orc_lookup.h
-new file mode 100644
-index 000000000000..91c8d868424d
---- /dev/null
-+++ b/arch/x86/include/asm/orc_lookup.h
-@@ -0,0 +1,46 @@
-+/*
-+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License
-+ * as published by the Free Software Foundation; either version 2
-+ * of the License, or (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
-+ */
-+#ifndef _ORC_LOOKUP_H
-+#define _ORC_LOOKUP_H
-+
-+/*
-+ * This is a lookup table for speeding up access to the .orc_unwind table.
-+ * Given an input address offset, the corresponding lookup table entry
-+ * specifies a subset of the .orc_unwind table to search.
-+ *
-+ * Each block represents the end of the previous range and the start of the
-+ * next range.  An extra block is added to give the last range an end.
-+ *
-+ * The block size should be a power of 2 to avoid a costly 'div' instruction.
-+ *
-+ * A block size of 256 was chosen because it roughly doubles unwinder
-+ * performance while only adding ~5% to the ORC data footprint.
-+ */
-+#define LOOKUP_BLOCK_ORDER    8
-+#define LOOKUP_BLOCK_SIZE     (1 << LOOKUP_BLOCK_ORDER)
-+
-+#ifndef LINKER_SCRIPT
-+
-+extern unsigned int orc_lookup[];
-+extern unsigned int orc_lookup_end[];
-+
-+#define LOOKUP_START_IP               (unsigned long)_stext
-+#define LOOKUP_STOP_IP                (unsigned long)_etext
-+
-+#endif /* LINKER_SCRIPT */
-+
-+#endif /* _ORC_LOOKUP_H */
-diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
-index 7dc777a6cb40..9c9dc579bd7d 100644
---- a/arch/x86/include/asm/orc_types.h
-+++ b/arch/x86/include/asm/orc_types.h
-@@ -88,7 +88,7 @@ struct orc_entry {
-       unsigned        sp_reg:4;
-       unsigned        bp_reg:4;
-       unsigned        type:2;
--};
-+} __packed;
- 
- /*
-  * This struct is used by asm and inline asm code to manually annotate the
-diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
-index e6676495b125..25b8d31a007d 100644
---- a/arch/x86/include/asm/unwind.h
-+++ b/arch/x86/include/asm/unwind.h
-@@ -12,11 +12,14 @@ struct unwind_state {
-       struct task_struct *task;
-       int graph_idx;
-       bool error;
--#ifdef CONFIG_FRAME_POINTER
-+#if defined(CONFIG_ORC_UNWINDER)
-+      bool signal, full_regs;
-+      unsigned long sp, bp, ip;
-+      struct pt_regs *regs;
-+#elif defined(CONFIG_FRAME_POINTER)
-       bool got_irq;
--      unsigned long *bp, *orig_sp;
-+      unsigned long *bp, *orig_sp, ip;
-       struct pt_regs *regs;
--      unsigned long ip;
- #else
-       unsigned long *sp;
- #endif
-@@ -24,41 +27,30 @@ struct unwind_state {
- 
- void __unwind_start(struct unwind_state *state, struct task_struct *task,
-                   struct pt_regs *regs, unsigned long *first_frame);
--
- bool unwind_next_frame(struct unwind_state *state);
--
- unsigned long unwind_get_return_address(struct unwind_state *state);
-+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state);
- 
- static inline bool unwind_done(struct unwind_state *state)
- {
-       return state->stack_info.type == STACK_TYPE_UNKNOWN;
- }
- 
--static inline
--void unwind_start(struct unwind_state *state, struct task_struct *task,
--                struct pt_regs *regs, unsigned long *first_frame)
--{
--      first_frame = first_frame ? : get_stack_pointer(task, regs);
--
--      __unwind_start(state, task, regs, first_frame);
--}
--
- static inline bool unwind_error(struct unwind_state *state)
- {
-       return state->error;
- }
- 
--#ifdef CONFIG_FRAME_POINTER
--
- static inline
--unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
-+void unwind_start(struct unwind_state *state, struct task_struct *task,
-+                struct pt_regs *regs, unsigned long *first_frame)
- {
--      if (unwind_done(state))
--              return NULL;
-+      first_frame = first_frame ? : get_stack_pointer(task, regs);
- 
--      return state->regs ? &state->regs->ip : state->bp + 1;
-+      __unwind_start(state, task, regs, first_frame);
- }
- 
-+#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER)
- static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
- {
-       if (unwind_done(state))
-@@ -66,20 +58,46 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
- 
-       return state->regs;
- }
--
--#else /* !CONFIG_FRAME_POINTER */
--
--static inline
--unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
-+#else
-+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
- {
-       return NULL;
- }
-+#endif
- 
--static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
-+#ifdef CONFIG_ORC_UNWINDER
-+void unwind_init(void);
-+void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
-+                      void *orc, size_t orc_size);
-+#else
-+static inline void unwind_init(void) {}
-+static inline
-+void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
-+                      void *orc, size_t orc_size) {}
-+#endif
-+
-+/*
-+ * This disables KASAN checking when reading a value from another task's stack,
-+ * since the other task could be running on another CPU and could have poisoned
-+ * the stack in the meantime.
-+ */
-+#define READ_ONCE_TASK_STACK(task, x)                 \
-+({                                                    \
-+      unsigned long val;                              \
-+      if (task == current)                            \
-+              val = READ_ONCE(x);                     \
-+      else                                            \
-+              val = READ_ONCE_NOCHECK(x);             \
-+      val;                                            \
-+})
-+
-+static inline bool task_on_another_cpu(struct task_struct *task)
- {
--      return NULL;
-+#ifdef CONFIG_SMP
-+      return task != current && task->on_cpu;
-+#else
-+      return false;
-+#endif
- }
- 
--#endif /* CONFIG_FRAME_POINTER */
--
- #endif /* _ASM_X86_UNWIND_H */
-diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
-index e7e955d4ab9e..9fdb54a95976 100644
---- a/include/asm-generic/vmlinux.lds.h
-+++ b/include/asm-generic/vmlinux.lds.h
-@@ -686,6 +686,31 @@
- #define BUG_TABLE
- #endif
- 
-+#ifdef CONFIG_ORC_UNWINDER
-+#define ORC_UNWIND_TABLE                                              \
-+      . = ALIGN(4);                                                   \
-+      .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) {       \
-+              VMLINUX_SYMBOL(__start_orc_unwind_ip) = .;              \
-+              KEEP(*(.orc_unwind_ip))                                 \
-+              VMLINUX_SYMBOL(__stop_orc_unwind_ip) = .;               \
-+      }                                                               \
-+      . = ALIGN(6);                                                   \
-+      .orc_unwind : AT(ADDR(.orc_unwind) - LOAD_OFFSET) {             \
-+              VMLINUX_SYMBOL(__start_orc_unwind) = .;                 \
-+              KEEP(*(.orc_unwind))                                    \
-+              VMLINUX_SYMBOL(__stop_orc_unwind) = .;                  \
-+      }                                                               \
-+      . = ALIGN(4);                                                   \
-+      .orc_lookup : AT(ADDR(.orc_lookup) - LOAD_OFFSET) {             \
-+              VMLINUX_SYMBOL(orc_lookup) = .;                         \
-+              . += (((SIZEOF(.text) + LOOKUP_BLOCK_SIZE - 1) /        \
-+                      LOOKUP_BLOCK_SIZE) + 1) * 4;                    \
-+              VMLINUX_SYMBOL(orc_lookup_end) = .;                     \
-+      }
-+#else
-+#define ORC_UNWIND_TABLE
-+#endif
-+
- #ifdef CONFIG_PM_TRACE
- #define TRACEDATA                                                     \
-       . = ALIGN(4);                                                   \
-@@ -872,7 +897,7 @@
-               DATA_DATA                                               \
-               CONSTRUCTORS                                            \
-       }                                                               \
--      BUG_TABLE
-+      BUG_TABLE                                                       \
- 
- #define INIT_TEXT_SECTION(inittext_align)                             \
-       . = ALIGN(inittext_align);                                      \
-diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
-index f67bd3205df7..62e7d70aadd5 100644
---- a/arch/x86/kernel/module.c
-+++ b/arch/x86/kernel/module.c
-@@ -35,6 +35,7 @@
- #include <asm/page.h>
- #include <asm/pgtable.h>
- #include <asm/setup.h>
-+#include <asm/unwind.h>
- 
- #if 0
- #define DEBUGP(fmt, ...)                              \
-@@ -213,7 +214,7 @@ int module_finalize(const Elf_Ehdr *hdr,
-                   struct module *me)
- {
-       const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
--              *para = NULL;
-+              *para = NULL, *orc = NULL, *orc_ip = NULL;
-       char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
- 
-       for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
-@@ -225,6 +226,10 @@ int module_finalize(const Elf_Ehdr *hdr,
-                       locks = s;
-               if (!strcmp(".parainstructions", secstrings + s->sh_name))
-                       para = s;
-+              if (!strcmp(".orc_unwind", secstrings + s->sh_name))
-+                      orc = s;
-+              if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
-+                      orc_ip = s;
-       }
- 
-       if (alt) {
-@@ -248,6 +253,10 @@ int module_finalize(const Elf_Ehdr *hdr,
-       /* make jump label nops */
-       jump_label_apply_nops(me);
- 
-+      if (orc && orc_ip)
-+              unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size,
-+                                 (void *)orc->sh_addr, orc->sh_size);
-+
-       return 0;
- }
- 
-diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
-index f964bfddfefd..dd6e8707e969 100644
---- a/arch/x86/kernel/setup.c
-+++ b/arch/x86/kernel/setup.c
-@@ -121,6 +121,7 @@
- #include <asm/microcode.h>
- #include <asm/mmu_context.h>
- #include <asm/kaslr.h>
-+#include <asm/unwind.h>
- 
- /*
-  * max_low_pfn_mapped: highest direct mapped pfn under 4GB
-@@ -1325,6 +1326,8 @@ void __init setup_arch(char **cmdline_p)
-       if (efi_enabled(EFI_BOOT))
-               efi_apply_memmap_quirks();
- #endif
-+
-+      unwind_init();
- }
- 
- #ifdef CONFIG_X86_32
-diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
-index c29e5bc7e9c9..d145a0b1f529 100644
---- a/arch/x86/kernel/unwind_frame.c
-+++ b/arch/x86/kernel/unwind_frame.c
-@@ -10,20 +10,22 @@
- 
- #define FRAME_HEADER_SIZE (sizeof(long) * 2)
- 
--/*
-- * This disables KASAN checking when reading a value from another task's stack,
-- * since the other task could be running on another CPU and could have poisoned
-- * the stack in the meantime.
-- */
--#define READ_ONCE_TASK_STACK(task, x)                 \
--({                                                    \
--      unsigned long val;                              \
--      if (task == current)                            \
--              val = READ_ONCE(x);                     \
--      else                                            \
--              val = READ_ONCE_NOCHECK(x);             \
--      val;                                            \
--})
-+unsigned long unwind_get_return_address(struct unwind_state *state)
-+{
-+      if (unwind_done(state))
-+              return 0;
-+
-+      return __kernel_text_address(state->ip) ? state->ip : 0;
-+}
-+EXPORT_SYMBOL_GPL(unwind_get_return_address);
-+
-+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
-+{
-+      if (unwind_done(state))
-+              return NULL;
-+
-+      return state->regs ? &state->regs->ip : state->bp + 1;
-+}
- 
- static void unwind_dump(struct unwind_state *state)
- {
-@@ -66,15 +68,6 @@ static void unwind_dump(struct unwind_state *state)
-       }
- }
- 
--unsigned long unwind_get_return_address(struct unwind_state *state)
--{
--      if (unwind_done(state))
--              return 0;
--
--      return __kernel_text_address(state->ip) ? state->ip : 0;
--}
--EXPORT_SYMBOL_GPL(unwind_get_return_address);
--
- static size_t regs_size(struct pt_regs *regs)
- {
-       /* x86_32 regs from kernel mode are two words shorter: */
-diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
-index 039f36738e49..4f0e17b90463 100644
---- a/arch/x86/kernel/unwind_guess.c
-+++ b/arch/x86/kernel/unwind_guess.c
-@@ -19,6 +19,11 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
- }
- EXPORT_SYMBOL_GPL(unwind_get_return_address);
- 
-+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
-+{
-+      return NULL;
-+}
-+
- bool unwind_next_frame(struct unwind_state *state)
- {
-       struct stack_info *info = &state->stack_info;
-diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
-new file mode 100644
-index 000000000000..570b70d3f604
---- /dev/null
-+++ b/arch/x86/kernel/unwind_orc.c
-@@ -0,0 +1,582 @@
-+#include <linux/module.h>
-+#include <linux/sort.h>
-+#include <asm/ptrace.h>
-+#include <asm/stacktrace.h>
-+#include <asm/unwind.h>
-+#include <asm/orc_types.h>
-+#include <asm/orc_lookup.h>
-+#include <asm/sections.h>
-+
-+#define orc_warn(fmt, ...) \
-+      printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
-+
-+extern int __start_orc_unwind_ip[];
-+extern int __stop_orc_unwind_ip[];
-+extern struct orc_entry __start_orc_unwind[];
-+extern struct orc_entry __stop_orc_unwind[];
-+
-+static DEFINE_MUTEX(sort_mutex);
-+int *cur_orc_ip_table = __start_orc_unwind_ip;
-+struct orc_entry *cur_orc_table = __start_orc_unwind;
-+
-+unsigned int lookup_num_blocks;
-+bool orc_init;
-+
-+static inline unsigned long orc_ip(const int *ip)
-+{
-+      return (unsigned long)ip + *ip;
-+}
-+
-+static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
-+                                  unsigned int num_entries, unsigned long ip)
-+{
-+      int *first = ip_table;
-+      int *last = ip_table + num_entries - 1;
-+      int *mid = first, *found = first;
-+
-+      if (!num_entries)
-+              return NULL;
-+
-+      /*
-+       * Do a binary range search to find the rightmost duplicate of a given
-+       * starting address.  Some entries are section terminators which are
-+       * "weak" entries for ensuring there are no gaps.  They should be
-+       * ignored when they conflict with a real entry.
-+       */
-+      while (first <= last) {
-+              mid = first + ((last - first) / 2);
-+
-+              if (orc_ip(mid) <= ip) {
-+                      found = mid;
-+                      first = mid + 1;
-+              } else
-+                      last = mid - 1;
-+      }
-+
-+      return u_table + (found - ip_table);
-+}
-+
-+#ifdef CONFIG_MODULES
-+static struct orc_entry *orc_module_find(unsigned long ip)
-+{
-+      struct module *mod;
-+
-+      mod = __module_address(ip);
-+      if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip)
-+              return NULL;
-+      return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind,
-+                        mod->arch.num_orcs, ip);
-+}
-+#else
-+static struct orc_entry *orc_module_find(unsigned long ip)
-+{
-+      return NULL;
-+}
-+#endif
-+
-+static struct orc_entry *orc_find(unsigned long ip)
-+{
-+      if (!orc_init)
-+              return NULL;
-+
-+      /* For non-init vmlinux addresses, use the fast lookup table: */
-+      if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
-+              unsigned int idx, start, stop;
-+
-+              idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
-+
-+              if (unlikely((idx >= lookup_num_blocks-1))) {
-+                      orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%lx\n",
-+                               idx, lookup_num_blocks, ip);
-+                      return NULL;
-+              }
-+
-+              start = orc_lookup[idx];
-+              stop = orc_lookup[idx + 1] + 1;
-+
-+              if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
-+                           (__start_orc_unwind + stop > __stop_orc_unwind))) {
-+                      orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%lx\n",
-+                               idx, lookup_num_blocks, start, stop, ip);
-+                      return NULL;
-+              }
-+
-+              return __orc_find(__start_orc_unwind_ip + start,
-+                                __start_orc_unwind + start, stop - start, ip);
-+      }
-+
-+      /* vmlinux .init slow lookup: */
-+      if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext)
-+              return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
-+                                __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
-+
-+      /* Module lookup: */
-+      return orc_module_find(ip);
-+}
-+
-+static void orc_sort_swap(void *_a, void *_b, int size)
-+{
-+      struct orc_entry *orc_a, *orc_b;
-+      struct orc_entry orc_tmp;
-+      int *a = _a, *b = _b, tmp;
-+      int delta = _b - _a;
-+
-+      /* Swap the .orc_unwind_ip entries: */
-+      tmp = *a;
-+      *a = *b + delta;
-+      *b = tmp - delta;
-+
-+      /* Swap the corresponding .orc_unwind entries: */
-+      orc_a = cur_orc_table + (a - cur_orc_ip_table);
-+      orc_b = cur_orc_table + (b - cur_orc_ip_table);
-+      orc_tmp = *orc_a;
-+      *orc_a = *orc_b;
-+      *orc_b = orc_tmp;
-+}
-+
-+static int orc_sort_cmp(const void *_a, const void *_b)
-+{
-+      struct orc_entry *orc_a;
-+      const int *a = _a, *b = _b;
-+      unsigned long a_val = orc_ip(a);
-+      unsigned long b_val = orc_ip(b);
-+
-+      if (a_val > b_val)
-+              return 1;
-+      if (a_val < b_val)
-+              return -1;
-+
-+      /*
-+       * The "weak" section terminator entries need to always be on the left
-+       * to ensure the lookup code skips them in favor of real entries.
-+       * These terminator entries exist to handle any gaps created by
-+       * whitelisted .o files which didn't get objtool generation.
-+       */
-+      orc_a = cur_orc_table + (a - cur_orc_ip_table);
-+      return orc_a->sp_reg == ORC_REG_UNDEFINED ? -1 : 1;
-+}
-+
-+#ifdef CONFIG_MODULES
-+void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
-+                      void *_orc, size_t orc_size)
-+{
-+      int *orc_ip = _orc_ip;
-+      struct orc_entry *orc = _orc;
-+      unsigned int num_entries = orc_ip_size / sizeof(int);
-+
-+      WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 ||
-+                   orc_size % sizeof(*orc) != 0 ||
-+                   num_entries != orc_size / sizeof(*orc));
-+
-+      /*
-+       * The 'cur_orc_*' globals allow the orc_sort_swap() callback to
-+       * associate an .orc_unwind_ip table entry with its corresponding
-+       * .orc_unwind entry so they can both be swapped.
-+       */
-+      mutex_lock(&sort_mutex);
-+      cur_orc_ip_table = orc_ip;
-+      cur_orc_table = orc;
-+      sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap);
-+      mutex_unlock(&sort_mutex);
-+
-+      mod->arch.orc_unwind_ip = orc_ip;
-+      mod->arch.orc_unwind = orc;
-+      mod->arch.num_orcs = num_entries;
-+}
-+#endif
-+
-+void __init unwind_init(void)
-+{
-+      size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
-+      size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
-+      size_t num_entries = orc_ip_size / sizeof(int);
-+      struct orc_entry *orc;
-+      int i;
-+
-+      if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
-+          orc_size % sizeof(struct orc_entry) != 0 ||
-+          num_entries != orc_size / sizeof(struct orc_entry)) {
-+              orc_warn("WARNING: Bad or missing .orc_unwind table.  Disabling unwinder.\n");
-+              return;
-+      }
-+
-+      /* Sort the .orc_unwind and .orc_unwind_ip tables: */
-+      sort(__start_orc_unwind_ip, num_entries, sizeof(int), orc_sort_cmp,
-+           orc_sort_swap);
-+
-+      /* Initialize the fast lookup table: */
-+      lookup_num_blocks = orc_lookup_end - orc_lookup;
-+      for (i = 0; i < lookup_num_blocks-1; i++) {
-+              orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
-+                               num_entries,
-+                               LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
-+              if (!orc) {
-+                      orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
-+                      return;
-+              }
-+
-+              orc_lookup[i] = orc - __start_orc_unwind;
-+      }
-+
-+      /* Initialize the ending block: */
-+      orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries,
-+                       LOOKUP_STOP_IP);
-+      if (!orc) {
-+              orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
-+              return;
-+      }
-+      orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;
-+
-+      orc_init = true;
-+}
-+
-+unsigned long unwind_get_return_address(struct unwind_state *state)
-+{
-+      if (unwind_done(state))
-+              return 0;
-+
-+      return __kernel_text_address(state->ip) ? state->ip : 0;
-+}
-+EXPORT_SYMBOL_GPL(unwind_get_return_address);
-+
-+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
-+{
-+      if (unwind_done(state))
-+              return NULL;
-+
-+      if (state->regs)
-+              return &state->regs->ip;
-+
-+      if (state->sp)
-+              return (unsigned long *)state->sp - 1;
-+
-+      return NULL;
-+}
-+
-+static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
-+                          size_t len)
-+{
-+      struct stack_info *info = &state->stack_info;
-+
-+      /*
-+       * If the address isn't on the current stack, switch to the next one.
-+       *
-+       * We may have to traverse multiple stacks to deal with the possibility
-+       * that info->next_sp could point to an empty stack and the address
-+       * could be on a subsequent stack.
-+       */
-+      while (!on_stack(info, (void *)addr, len))
-+              if (get_stack_info(info->next_sp, state->task, info,
-+                                 &state->stack_mask))
-+                      return false;
-+
-+      return true;
-+}
-+
-+static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
-+                          unsigned long *val)
-+{
-+      if (!stack_access_ok(state, addr, sizeof(long)))
-+              return false;
-+
-+      *val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr);
-+      return true;
-+}
-+
-+#define REGS_SIZE (sizeof(struct pt_regs))
-+#define SP_OFFSET (offsetof(struct pt_regs, sp))
-+#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
-+#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
-+
-+static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
-+                           unsigned long *ip, unsigned long *sp, bool full)
-+{
-+      size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
-+      size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
-+      struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
-+
-+      if (IS_ENABLED(CONFIG_X86_64)) {
-+              if (!stack_access_ok(state, addr, regs_size))
-+                      return false;
-+
-+              *ip = regs->ip;
-+              *sp = regs->sp;
-+
-+              return true;
-+      }
-+
-+      if (!stack_access_ok(state, addr, sp_offset))
-+              return false;
-+
-+      *ip = regs->ip;
-+
-+      if (user_mode(regs)) {
-+              if (!stack_access_ok(state, addr + sp_offset,
-+                                   REGS_SIZE - SP_OFFSET))
-+                      return false;
-+
-+              *sp = regs->sp;
-+      } else
-+              *sp = (unsigned long)&regs->sp;
-+
-+      return true;
-+}
-+
-+bool unwind_next_frame(struct unwind_state *state)
-+{
-+      unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
-+      enum stack_type prev_type = state->stack_info.type;
-+      struct orc_entry *orc;
-+      struct pt_regs *ptregs;
-+      bool indirect = false;
-+
-+      if (unwind_done(state))
-+              return false;
-+
-+      /* Don't let modules unload while we're reading their ORC data. */
-+      preempt_disable();
-+
-+      /* Have we reached the end? */
-+      if (state->regs && user_mode(state->regs))
-+              goto done;
-+
-+      /*
-+       * Find the orc_entry associated with the text address.
-+       *
-+       * Decrement call return addresses by one so they work for sibling
-+       * calls and calls to noreturn functions.
-+       */
-+      orc = orc_find(state->signal ? state->ip : state->ip - 1);
-+      if (!orc || orc->sp_reg == ORC_REG_UNDEFINED)
-+              goto done;
-+      orig_ip = state->ip;
-+
-+      /* Find the previous frame's stack: */
-+      switch (orc->sp_reg) {
-+      case ORC_REG_SP:
-+              sp = state->sp + orc->sp_offset;
-+              break;
-+
-+      case ORC_REG_BP:
-+              sp = state->bp + orc->sp_offset;
-+              break;
-+
-+      case ORC_REG_SP_INDIRECT:
-+              sp = state->sp + orc->sp_offset;
-+              indirect = true;
-+              break;
-+
-+      case ORC_REG_BP_INDIRECT:
-+              sp = state->bp + orc->sp_offset;
-+              indirect = true;
-+              break;
-+
-+      case ORC_REG_R10:
-+              if (!state->regs || !state->full_regs) {
-+                      orc_warn("missing regs for base reg R10 at ip %p\n",
-+                               (void *)state->ip);
-+                      goto done;
-+              }
-+              sp = state->regs->r10;
-+              break;
-+
-+      case ORC_REG_R13:
-+              if (!state->regs || !state->full_regs) {
-+                      orc_warn("missing regs for base reg R13 at ip %p\n",
-+                               (void *)state->ip);
-+                      goto done;
-+              }
-+              sp = state->regs->r13;
-+              break;
-+
-+      case ORC_REG_DI:
-+              if (!state->regs || !state->full_regs) {
-+                      orc_warn("missing regs for base reg DI at ip %p\n",
-+                               (void *)state->ip);
-+                      goto done;
-+              }
-+              sp = state->regs->di;
-+              break;
-+
-+      case ORC_REG_DX:
-+              if (!state->regs || !state->full_regs) {
-+                      orc_warn("missing regs for base reg DX at ip %p\n",
-+                               (void *)state->ip);
-+                      goto done;
-+              }
-+              sp = state->regs->dx;
-+              break;
-+
-+      default:
-+              orc_warn("unknown SP base reg %d for ip %p\n",
-+                       orc->sp_reg, (void *)state->ip);
-+              goto done;
-+      }
-+
-+      if (indirect) {
-+              if (!deref_stack_reg(state, sp, &sp))
-+                      goto done;
-+      }
-+
-+      /* Find IP, SP and possibly regs: */
-+      switch (orc->type) {
-+      case ORC_TYPE_CALL:
-+              ip_p = sp - sizeof(long);
-+
-+              if (!deref_stack_reg(state, ip_p, &state->ip))
-+                      goto done;
-+
-+              state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
-+                                                state->ip, (void *)ip_p);
-+
-+              state->sp = sp;
-+              state->regs = NULL;
-+              state->signal = false;
-+              break;
-+
-+      case ORC_TYPE_REGS:
-+              if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
-+                      orc_warn("can't dereference registers at %p for ip %p\n",
-+                               (void *)sp, (void *)orig_ip);
-+                      goto done;
-+              }
-+
-+              state->regs = (struct pt_regs *)sp;
-+              state->full_regs = true;
-+              state->signal = true;
-+              break;
-+
-+      case ORC_TYPE_REGS_IRET:
-+              if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
-+                      orc_warn("can't dereference iret registers at %p for ip %p\n",
-+                               (void *)sp, (void *)orig_ip);
-+                      goto done;
-+              }
-+
-+              ptregs = container_of((void *)sp, struct pt_regs, ip);
-+              if ((unsigned long)ptregs >= prev_sp &&
-+                  on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
-+                      state->regs = ptregs;
-+                      state->full_regs = false;
-+              } else
-+                      state->regs = NULL;
-+
-+              state->signal = true;
-+              break;
-+
-+      default:
-+              orc_warn("unknown .orc_unwind entry type %d\n", orc->type);
-+              break;
-+      }
-+
-+      /* Find BP: */
-+      switch (orc->bp_reg) {
-+      case ORC_REG_UNDEFINED:
-+              if (state->regs && state->full_regs)
-+                      state->bp = state->regs->bp;
-+              break;
-+
-+      case ORC_REG_PREV_SP:
-+              if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp))
-+                      goto done;
-+              break;
-+
-+      case ORC_REG_BP:
-+              if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp))
-+                      goto done;
-+              break;
-+
-+      default:
-+              orc_warn("unknown BP base reg %d for ip %p\n",
-+                       orc->bp_reg, (void *)orig_ip);
-+              goto done;
-+      }
-+
-+      /* Prevent a recursive loop due to bad ORC data: */
-+      if (state->stack_info.type == prev_type &&
-+          on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
-+          state->sp <= prev_sp) {
-+              orc_warn("stack going in the wrong direction? ip=%p\n",
-+                       (void *)orig_ip);
-+              goto done;
-+      }
-+
-+      preempt_enable();
-+      return true;
-+
-+done:
-+      preempt_enable();
-+      state->stack_info.type = STACK_TYPE_UNKNOWN;
-+      return false;
-+}
-+EXPORT_SYMBOL_GPL(unwind_next_frame);
-+
-+void __unwind_start(struct unwind_state *state, struct task_struct *task,
-+                  struct pt_regs *regs, unsigned long *first_frame)
-+{
-+      memset(state, 0, sizeof(*state));
-+      state->task = task;
-+
-+      /*
-+       * Refuse to unwind the stack of a task while it's executing on another
-+       * CPU.  This check is racy, but that's ok: the unwinder has other
-+       * checks to prevent it from going off the rails.
-+       */
-+      if (task_on_another_cpu(task))
-+              goto done;
-+
-+      if (regs) {
-+              if (user_mode(regs))
-+                      goto done;
-+
-+              state->ip = regs->ip;
-+              state->sp = kernel_stack_pointer(regs);
-+              state->bp = regs->bp;
-+              state->regs = regs;
-+              state->full_regs = true;
-+              state->signal = true;
-+
-+      } else if (task == current) {
-+              asm volatile("lea (%%rip), %0\n\t"
-+                           "mov %%rsp, %1\n\t"
-+                           "mov %%rbp, %2\n\t"
-+                           : "=r" (state->ip), "=r" (state->sp),
-+                             "=r" (state->bp));
-+
-+      } else {
-+              struct inactive_task_frame *frame = (void *)task->thread.sp;
-+
-+              state->sp = task->thread.sp;
-+              state->bp = READ_ONCE_NOCHECK(frame->bp);
-+              state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
-+      }
-+
-+      if (get_stack_info((unsigned long *)state->sp, state->task,
-+                         &state->stack_info, &state->stack_mask))
-+              return;
-+
-+      /*
-+       * The caller can provide the address of the first frame directly
-+       * (first_frame) or indirectly (regs->sp) to indicate which stack frame
-+       * to start unwinding at.  Skip ahead until we reach it.
-+       */
-+
-+      /* When starting from regs, skip the regs frame: */
-+      if (regs) {
-+              unwind_next_frame(state);
-+              return;
-+      }
-+
-+      /* Otherwise, skip ahead to the user-specified starting frame: */
-+      while (!unwind_done(state) &&
-+             (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
-+                      state->sp <= (unsigned long)first_frame))
-+              unwind_next_frame(state);
-+
-+      return;
-+
-+done:
-+      state->stack_info.type = STACK_TYPE_UNKNOWN;
-+      return;
-+}
-+EXPORT_SYMBOL_GPL(__unwind_start);
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 658fcf67862c..d6f45f6d1054 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -158,6 +158,7 @@ config X86
-       select HAVE_MEMBLOCK
-       select HAVE_MEMBLOCK_NODE_MAP
-       select HAVE_MIXED_BREAKPOINTS_REGS
-+      select HAVE_MOD_ARCH_SPECIFIC
-       select HAVE_NMI
-       select HAVE_OPROFILE
-       select HAVE_OPTPROBES
-diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
-index 1fc519f3c49e..d5bca2ec8a74 100644
---- a/arch/x86/Kconfig.debug
-+++ b/arch/x86/Kconfig.debug
-@@ -356,4 +356,29 @@ config PUNIT_ATOM_DEBUG
-         The current power state can be read from
-         /sys/kernel/debug/punit_atom/dev_power_state
- 
-+config ORC_UNWINDER
-+      bool "ORC unwinder"
-+      depends on X86_64
-+      select STACK_VALIDATION
-+      ---help---
-+        This option enables the ORC (Oops Rewind Capability) unwinder for
-+        unwinding kernel stack traces.  It uses a custom data format which is
-+        a simplified version of the DWARF Call Frame Information standard.
-+
-+        This unwinder is more accurate across interrupt entry frames than the
-+        frame pointer unwinder.  It can also enable a 5-10% performance
-+        improvement across the entire kernel if CONFIG_FRAME_POINTER is
-+        disabled.
-+
-+        Enabling this option will increase the kernel's runtime memory usage
-+        by roughly 2-4MB, depending on your kernel config.
-+
-+config FRAME_POINTER_UNWINDER
-+      def_bool y
-+      depends on !ORC_UNWINDER && FRAME_POINTER
-+
-+config GUESS_UNWINDER
-+      def_bool y
-+      depends on !ORC_UNWINDER && !FRAME_POINTER
-+
- endmenu
-diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
-index c8a3b61be0aa..f05f00acac89 100644
---- a/arch/x86/kernel/vmlinux.lds.S
-+++ b/arch/x86/kernel/vmlinux.lds.S
-@@ -24,6 +24,7 @@
- #include <asm/asm-offsets.h>
- #include <asm/thread_info.h>
- #include <asm/page_types.h>
-+#include <asm/orc_lookup.h>
- #include <asm/cache.h>
- #include <asm/boot.h>
- 
-@@ -148,6 +149,8 @@ SECTIONS
- 
-       BUG_TABLE
- 
-+      ORC_UNWIND_TABLE
-+
-       . = ALIGN(PAGE_SIZE);
-       __vvar_page = .;
- 
-diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index c617b9d1d6cb..0b4d1b3880b0 100644
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -374,6 +374,9 @@ config STACK_VALIDATION
-         pointers (if CONFIG_FRAME_POINTER is enabled).  This helps ensure
-         that runtime stack traces are more reliable.
- 
-+        This is also a prerequisite for generation of ORC unwind data, which
-+        is needed for CONFIG_ORC_UNWINDER.
-+
-         For more information, see
-         tools/objtool/Documentation/stack-validation.txt.
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0038-x86-kconfig-Consolidate-unwinders-into-multiple-choi.patch b/patches/kernel/0038-x86-kconfig-Consolidate-unwinders-into-multiple-choi.patch

deleted file mode 100644 (file)

index f588b6e..0000000
--- a/patches/kernel/0038-x86-kconfig-Consolidate-unwinders-into-multiple-choi.patch
+++ /dev/null
@@ -1,171 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Tue, 25 Jul 2017 08:54:24 -0500
-Subject: [PATCH] x86/kconfig: Consolidate unwinders into multiple choice
- selection
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-There are three mutually exclusive unwinders.  Make that more obvious by
-combining them into a multiple-choice selection:
-
-  CONFIG_FRAME_POINTER_UNWINDER
-  CONFIG_ORC_UNWINDER
-  CONFIG_GUESS_UNWINDER (if CONFIG_EXPERT=y)
-
-Frame pointers are still the default (for now).
-
-The old CONFIG_FRAME_POINTER option is still used in some
-arch-independent places, so keep it around, but make it
-invisible to the user on x86 - it's now selected by
-CONFIG_FRAME_POINTER_UNWINDER=y.
-
-Suggested-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mike Galbraith <efault@gmx.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: live-patching@vger.kernel.org
-Link: http://lkml.kernel.org/r/20170725135424.zukjmgpz3plf5pmt@treble
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 81d387190039c14edac8de2b3ec789beb899afd9)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 26ddacc1e6333555e4a6bd63c4c935b323509f92)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/unwind.h |  4 ++--
- arch/x86/Kconfig              |  3 +--
- arch/x86/Kconfig.debug        | 45 +++++++++++++++++++++++++++++++++++++------
- arch/x86/configs/tiny.config  |  2 ++
- 4 files changed, 44 insertions(+), 10 deletions(-)
-
-diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
-index 25b8d31a007d..e9f793e2df7a 100644
---- a/arch/x86/include/asm/unwind.h
-+++ b/arch/x86/include/asm/unwind.h
-@@ -16,7 +16,7 @@ struct unwind_state {
-       bool signal, full_regs;
-       unsigned long sp, bp, ip;
-       struct pt_regs *regs;
--#elif defined(CONFIG_FRAME_POINTER)
-+#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
-       bool got_irq;
-       unsigned long *bp, *orig_sp, ip;
-       struct pt_regs *regs;
-@@ -50,7 +50,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
-       __unwind_start(state, task, regs, first_frame);
- }
- 
--#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER)
-+#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
- static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
- {
-       if (unwind_done(state))
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index d6f45f6d1054..3a0b8cb57caf 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -73,7 +73,6 @@ config X86
-       select ARCH_USE_QUEUED_RWLOCKS
-       select ARCH_USE_QUEUED_SPINLOCKS
-       select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
--      select ARCH_WANT_FRAME_POINTERS
-       select ARCH_WANTS_DYNAMIC_TASK_STRUCT
-       select ARCH_WANTS_THP_SWAP              if X86_64
-       select BUILDTIME_EXTABLE_SORT
-@@ -169,7 +168,7 @@ config X86
-       select HAVE_PERF_REGS
-       select HAVE_PERF_USER_STACK_DUMP
-       select HAVE_REGS_AND_STACK_ACCESS_API
--      select HAVE_RELIABLE_STACKTRACE         if X86_64 && FRAME_POINTER && STACK_VALIDATION
-+      select HAVE_RELIABLE_STACKTRACE         if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
-       select HAVE_STACK_VALIDATION            if X86_64
-       select HAVE_SYSCALL_TRACEPOINTS
-       select HAVE_UNSTABLE_SCHED_CLOCK
-diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
-index d5bca2ec8a74..c441b5d65ec8 100644
---- a/arch/x86/Kconfig.debug
-+++ b/arch/x86/Kconfig.debug
-@@ -356,6 +356,29 @@ config PUNIT_ATOM_DEBUG
-         The current power state can be read from
-         /sys/kernel/debug/punit_atom/dev_power_state
- 
-+choice
-+      prompt "Choose kernel unwinder"
-+      default FRAME_POINTER_UNWINDER
-+      ---help---
-+        This determines which method will be used for unwinding kernel stack
-+        traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
-+        livepatch, lockdep, and more.
-+
-+config FRAME_POINTER_UNWINDER
-+      bool "Frame pointer unwinder"
-+      select FRAME_POINTER
-+      ---help---
-+        This option enables the frame pointer unwinder for unwinding kernel
-+        stack traces.
-+
-+        The unwinder itself is fast and it uses less RAM than the ORC
-+        unwinder, but the kernel text size will grow by ~3% and the kernel's
-+        overall performance will degrade by roughly 5-10%.
-+
-+        This option is recommended if you want to use the livepatch
-+        consistency model, as this is currently the only way to get a
-+        reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
-+
- config ORC_UNWINDER
-       bool "ORC unwinder"
-       depends on X86_64
-@@ -373,12 +396,22 @@ config ORC_UNWINDER
-         Enabling this option will increase the kernel's runtime memory usage
-         by roughly 2-4MB, depending on your kernel config.
- 
--config FRAME_POINTER_UNWINDER
--      def_bool y
--      depends on !ORC_UNWINDER && FRAME_POINTER
--
- config GUESS_UNWINDER
--      def_bool y
--      depends on !ORC_UNWINDER && !FRAME_POINTER
-+      bool "Guess unwinder"
-+      depends on EXPERT
-+      ---help---
-+        This option enables the "guess" unwinder for unwinding kernel stack
-+        traces.  It scans the stack and reports every kernel text address it
-+        finds.  Some of the addresses it reports may be incorrect.
-+
-+        While this option often produces false positives, it can still be
-+        useful in many cases.  Unlike the other unwinders, it has no runtime
-+        overhead.
-+
-+endchoice
-+
-+config FRAME_POINTER
-+      depends on !ORC_UNWINDER && !GUESS_UNWINDER
-+      bool
- 
- endmenu
-diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
-index 4b429df40d7a..550cd5012b73 100644
---- a/arch/x86/configs/tiny.config
-+++ b/arch/x86/configs/tiny.config
-@@ -1,3 +1,5 @@
- CONFIG_NOHIGHMEM=y
- # CONFIG_HIGHMEM4G is not set
- # CONFIG_HIGHMEM64G is not set
-+CONFIG_GUESS_UNWINDER=y
-+# CONFIG_FRAME_POINTER_UNWINDER is not set
--- 
-2.14.2
-
diff --git a/patches/kernel/0038-x86-unwind-Add-the-ORC-unwinder.patch b/patches/kernel/0038-x86-unwind-Add-the-ORC-unwinder.patch

new file mode 100644 (file)

index 0000000..b8f0318
--- /dev/null
+++ b/patches/kernel/0038-x86-unwind-Add-the-ORC-unwinder.patch
@@ -0,0 +1,1407 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 24 Jul 2017 18:36:57 -0500
+Subject: [PATCH] x86/unwind: Add the ORC unwinder
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Add the new ORC unwinder which is enabled by CONFIG_ORC_UNWINDER=y.
+It plugs into the existing x86 unwinder framework.
+
+It relies on objtool to generate the needed .orc_unwind and
+.orc_unwind_ip sections.
+
+For more details on why ORC is used instead of DWARF, see
+Documentation/x86/orc-unwinder.txt - but the short version is
+that it's a simplified, fundamentally more robust debugninfo
+data structure, which also allows up to two orders of magnitude
+faster lookups than the DWARF unwinder - which matters to
+profiling workloads like perf.
+
+Thanks to Andy Lutomirski for the performance improvement ideas:
+splitting the ORC unwind table into two parallel arrays and creating a
+fast lookup table to search a subset of the unwind table.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: live-patching@vger.kernel.org
+Link: http://lkml.kernel.org/r/0a6cbfb40f8da99b7a45a1a8302dc6aef16ec812.1500938583.git.jpoimboe@redhat.com
+[ Extended the changelog. ]
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit ee9f8fce99640811b2b8e79d0d1dbe8bab69ba67)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit dccbf63d7a6cc431af23a86e28275a74904545cd)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/x86/orc-unwinder.txt | 179 ++++++++++++
+ arch/x86/kernel/Makefile           |   8 +-
+ scripts/Makefile.build             |  14 +-
+ arch/um/include/asm/unwind.h       |   8 +
+ arch/x86/include/asm/module.h      |   9 +
+ arch/x86/include/asm/orc_lookup.h  |  46 +++
+ arch/x86/include/asm/orc_types.h   |   2 +-
+ arch/x86/include/asm/unwind.h      |  76 +++--
+ include/asm-generic/vmlinux.lds.h  |  27 +-
+ arch/x86/kernel/module.c           |  11 +-
+ arch/x86/kernel/setup.c            |   3 +
+ arch/x86/kernel/unwind_frame.c     |  39 +--
+ arch/x86/kernel/unwind_guess.c     |   5 +
+ arch/x86/kernel/unwind_orc.c       | 582 +++++++++++++++++++++++++++++++++++++
+ arch/x86/Kconfig                   |   1 +
+ arch/x86/Kconfig.debug             |  25 ++
+ arch/x86/kernel/vmlinux.lds.S      |   3 +
+ lib/Kconfig.debug                  |   3 +
+ 18 files changed, 977 insertions(+), 64 deletions(-)
+ create mode 100644 Documentation/x86/orc-unwinder.txt
+ create mode 100644 arch/um/include/asm/unwind.h
+ create mode 100644 arch/x86/include/asm/orc_lookup.h
+ create mode 100644 arch/x86/kernel/unwind_orc.c
+
+diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
+new file mode 100644
+index 000000000000..af0c9a4c65a6
+--- /dev/null
++++ b/Documentation/x86/orc-unwinder.txt
+@@ -0,0 +1,179 @@
++ORC unwinder
++============
++
++Overview
++--------
++
++The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
++similar in concept to a DWARF unwinder.  The difference is that the
++format of the ORC data is much simpler than DWARF, which in turn allows
++the ORC unwinder to be much simpler and faster.
++
++The ORC data consists of unwind tables which are generated by objtool.
++They contain out-of-band data which is used by the in-kernel ORC
++unwinder.  Objtool generates the ORC data by first doing compile-time
++stack metadata validation (CONFIG_STACK_VALIDATION).  After analyzing
++all the code paths of a .o file, it determines information about the
++stack state at each instruction address in the file and outputs that
++information to the .orc_unwind and .orc_unwind_ip sections.
++
++The per-object ORC sections are combined at link time and are sorted and
++post-processed at boot time.  The unwinder uses the resulting data to
++correlate instruction addresses with their stack states at run time.
++
++
++ORC vs frame pointers
++---------------------
++
++With frame pointers enabled, GCC adds instrumentation code to every
++function in the kernel.  The kernel's .text size increases by about
++3.2%, resulting in a broad kernel-wide slowdown.  Measurements by Mel
++Gorman [1] have shown a slowdown of 5-10% for some workloads.
++
++In contrast, the ORC unwinder has no effect on text size or runtime
++performance, because the debuginfo is out of band.  So if you disable
++frame pointers and enable the ORC unwinder, you get a nice performance
++improvement across the board, and still have reliable stack traces.
++
++Ingo Molnar says:
++
++  "Note that it's not just a performance improvement, but also an
++  instruction cache locality improvement: 3.2% .text savings almost
++  directly transform into a similarly sized reduction in cache
++  footprint. That can transform to even higher speedups for workloads
++  whose cache locality is borderline."
++
++Another benefit of ORC compared to frame pointers is that it can
++reliably unwind across interrupts and exceptions.  Frame pointer based
++unwinds can sometimes skip the caller of the interrupted function, if it
++was a leaf function or if the interrupt hit before the frame pointer was
++saved.
++
++The main disadvantage of the ORC unwinder compared to frame pointers is
++that it needs more memory to store the ORC unwind tables: roughly 2-4MB
++depending on the kernel config.
++
++
++ORC vs DWARF
++------------
++
++ORC debuginfo's advantage over DWARF itself is that it's much simpler.
++It gets rid of the complex DWARF CFI state machine and also gets rid of
++the tracking of unnecessary registers.  This allows the unwinder to be
++much simpler, meaning fewer bugs, which is especially important for
++mission critical oops code.
++
++The simpler debuginfo format also enables the unwinder to be much faster
++than DWARF, which is important for perf and lockdep.  In a basic
++performance test by Jiri Slaby [2], the ORC unwinder was about 20x
++faster than an out-of-tree DWARF unwinder.  (Note: That measurement was
++taken before some performance tweaks were added, which doubled
++performance, so the speedup over DWARF may be closer to 40x.)
++
++The ORC data format does have a few downsides compared to DWARF.  ORC
++unwind tables take up ~50% more RAM (+1.3MB on an x86 defconfig kernel)
++than DWARF-based eh_frame tables.
++
++Another potential downside is that, as GCC evolves, it's conceivable
++that the ORC data may end up being *too* simple to describe the state of
++the stack for certain optimizations.  But IMO this is unlikely because
++GCC saves the frame pointer for any unusual stack adjustments it does,
++so I suspect we'll really only ever need to keep track of the stack
++pointer and the frame pointer between call frames.  But even if we do
++end up having to track all the registers DWARF tracks, at least we will
++still be able to control the format, e.g. no complex state machines.
++
++
++ORC unwind table generation
++---------------------------
++
++The ORC data is generated by objtool.  With the existing compile-time
++stack metadata validation feature, objtool already follows all code
++paths, and so it already has all the information it needs to be able to
++generate ORC data from scratch.  So it's an easy step to go from stack
++validation to ORC data generation.
++
++It should be possible to instead generate the ORC data with a simple
++tool which converts DWARF to ORC data.  However, such a solution would
++be incomplete due to the kernel's extensive use of asm, inline asm, and
++special sections like exception tables.
++
++That could be rectified by manually annotating those special code paths
++using GNU assembler .cfi annotations in .S files, and homegrown
++annotations for inline asm in .c files.  But asm annotations were tried
++in the past and were found to be unmaintainable.  They were often
++incorrect/incomplete and made the code harder to read and keep updated.
++And based on looking at glibc code, annotating inline asm in .c files
++might be even worse.
++
++Objtool still needs a few annotations, but only in code which does
++unusual things to the stack like entry code.  And even then, far fewer
++annotations are needed than what DWARF would need, so they're much more
++maintainable than DWARF CFI annotations.
++
++So the advantages of using objtool to generate ORC data are that it
++gives more accurate debuginfo, with very few annotations.  It also
++insulates the kernel from toolchain bugs which can be very painful to
++deal with in the kernel since we often have to workaround issues in
++older versions of the toolchain for years.
++
++The downside is that the unwinder now becomes dependent on objtool's
++ability to reverse engineer GCC code flow.  If GCC optimizations become
++too complicated for objtool to follow, the ORC data generation might
++stop working or become incomplete.  (It's worth noting that livepatch
++already has such a dependency on objtool's ability to follow GCC code
++flow.)
++
++If newer versions of GCC come up with some optimizations which break
++objtool, we may need to revisit the current implementation.  Some
++possible solutions would be asking GCC to make the optimizations more
++palatable, or having objtool use DWARF as an additional input, or
++creating a GCC plugin to assist objtool with its analysis.  But for now,
++objtool follows GCC code quite well.
++
++
++Unwinder implementation details
++-------------------------------
++
++Objtool generates the ORC data by integrating with the compile-time
++stack metadata validation feature, which is described in detail in
++tools/objtool/Documentation/stack-validation.txt.  After analyzing all
++the code paths of a .o file, it creates an array of orc_entry structs,
++and a parallel array of instruction addresses associated with those
++structs, and writes them to the .orc_unwind and .orc_unwind_ip sections
++respectively.
++
++The ORC data is split into the two arrays for performance reasons, to
++make the searchable part of the data (.orc_unwind_ip) more compact.  The
++arrays are sorted in parallel at boot time.
++
++Performance is further improved by the use of a fast lookup table which
++is created at runtime.  The fast lookup table associates a given address
++with a range of indices for the .orc_unwind table, so that only a small
++subset of the table needs to be searched.
++
++
++Etymology
++---------
++
++Orcs, fearsome creatures of medieval folklore, are the Dwarves' natural
++enemies.  Similarly, the ORC unwinder was created in opposition to the
++complexity and slowness of DWARF.
++
++"Although Orcs rarely consider multiple solutions to a problem, they do
++excel at getting things done because they are creatures of action, not
++thought." [3]  Similarly, unlike the esoteric DWARF unwinder, the
++veracious ORC unwinder wastes no time or siloconic effort decoding
++variable-length zero-extended unsigned-integer byte-coded
++state-machine-based debug information entries.
++
++Similar to how Orcs frequently unravel the well-intentioned plans of
++their adversaries, the ORC unwinder frequently unravels stacks with
++brutal, unyielding efficiency.
++
++ORC stands for Oops Rewind Capability.
++
++
++[1] https://lkml.kernel.org/r/20170602104048.jkkzssljsompjdwy@suse.de
++[2] https://lkml.kernel.org/r/d2ca5435-6386-29b8-db87-7f227c2b713a@suse.cz
++[3] http://dustin.wikidot.com/half-orcs-and-orcs
+diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
+index a01892bdd61a..287eac7d207f 100644
+--- a/arch/x86/kernel/Makefile
++++ b/arch/x86/kernel/Makefile
+@@ -126,11 +126,9 @@ obj-$(CONFIG_PERF_EVENTS)         += perf_regs.o
+ obj-$(CONFIG_TRACING)                 += tracepoint.o
+ obj-$(CONFIG_SCHED_MC_PRIO)           += itmt.o
+ 
+-ifdef CONFIG_FRAME_POINTER
+-obj-y                                 += unwind_frame.o
+-else
+-obj-y                                 += unwind_guess.o
+-endif
++obj-$(CONFIG_ORC_UNWINDER)            += unwind_orc.o
++obj-$(CONFIG_FRAME_POINTER_UNWINDER)  += unwind_frame.o
++obj-$(CONFIG_GUESS_UNWINDER)          += unwind_guess.o
+ 
+ ###
+ # 64 bit specific files
+diff --git a/scripts/Makefile.build b/scripts/Makefile.build
+index 273bc2228307..ab2c8ef43cdb 100644
+--- a/scripts/Makefile.build
++++ b/scripts/Makefile.build
+@@ -258,7 +258,8 @@ ifneq ($(SKIP_STACK_VALIDATION),1)
+ 
+ __objtool_obj := $(objtree)/tools/objtool/objtool
+ 
+-objtool_args = check
++objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check)
++
+ ifndef CONFIG_FRAME_POINTER
+ objtool_args += --no-fp
+ endif
+@@ -276,6 +277,11 @@ objtool_obj = $(if $(patsubst y%,, \
+ endif # SKIP_STACK_VALIDATION
+ endif # CONFIG_STACK_VALIDATION
+ 
++# Rebuild all objects when objtool changes, or is enabled/disabled.
++objtool_dep = $(objtool_obj)                                  \
++            $(wildcard include/config/orc/unwinder.h          \
++                       include/config/stack/validation.h)
++
+ define rule_cc_o_c
+       $(call echo-cmd,checksrc) $(cmd_checksrc)                         \
+       $(call cmd_and_fixdep,cc_o_c)                                     \
+@@ -298,14 +304,14 @@ cmd_undef_syms = echo
+ endif
+ 
+ # Built-in and composite module parts
+-$(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_obj) FORCE
++$(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE
+       $(call cmd,force_checksrc)
+       $(call cmd,force_check_kmsg)
+       $(call if_changed_rule,cc_o_c)
+ 
+ # Single-part modules are special since we need to mark them in $(MODVERDIR)
+ 
+-$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_obj) FORCE
++$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE
+       $(call cmd,force_checksrc)
+       $(call cmd,force_check_kmsg)
+       $(call if_changed_rule,cc_o_c)
+@@ -401,7 +407,7 @@ cmd_modversions_S =                                                                \
+ endif
+ endif
+ 
+-$(obj)/%.o: $(src)/%.S $(objtool_obj) FORCE
++$(obj)/%.o: $(src)/%.S $(objtool_dep) FORCE
+       $(call if_changed_rule,as_o_S)
+ 
+ targets += $(real-objs-y) $(real-objs-m) $(lib-y)
+diff --git a/arch/um/include/asm/unwind.h b/arch/um/include/asm/unwind.h
+new file mode 100644
+index 000000000000..7ffa5437b761
+--- /dev/null
++++ b/arch/um/include/asm/unwind.h
+@@ -0,0 +1,8 @@
++#ifndef _ASM_UML_UNWIND_H
++#define _ASM_UML_UNWIND_H
++
++static inline void
++unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
++                 void *orc, size_t orc_size) {}
++
++#endif /* _ASM_UML_UNWIND_H */
+diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
+index e3b7819caeef..9eb7c718aaf8 100644
+--- a/arch/x86/include/asm/module.h
++++ b/arch/x86/include/asm/module.h
+@@ -2,6 +2,15 @@
+ #define _ASM_X86_MODULE_H
+ 
+ #include <asm-generic/module.h>
++#include <asm/orc_types.h>
++
++struct mod_arch_specific {
++#ifdef CONFIG_ORC_UNWINDER
++      unsigned int num_orcs;
++      int *orc_unwind_ip;
++      struct orc_entry *orc_unwind;
++#endif
++};
+ 
+ #ifdef CONFIG_X86_64
+ /* X86_64 does not define MODULE_PROC_FAMILY */
+diff --git a/arch/x86/include/asm/orc_lookup.h b/arch/x86/include/asm/orc_lookup.h
+new file mode 100644
+index 000000000000..91c8d868424d
+--- /dev/null
++++ b/arch/x86/include/asm/orc_lookup.h
+@@ -0,0 +1,46 @@
++/*
++ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, see <http://www.gnu.org/licenses/>.
++ */
++#ifndef _ORC_LOOKUP_H
++#define _ORC_LOOKUP_H
++
++/*
++ * This is a lookup table for speeding up access to the .orc_unwind table.
++ * Given an input address offset, the corresponding lookup table entry
++ * specifies a subset of the .orc_unwind table to search.
++ *
++ * Each block represents the end of the previous range and the start of the
++ * next range.  An extra block is added to give the last range an end.
++ *
++ * The block size should be a power of 2 to avoid a costly 'div' instruction.
++ *
++ * A block size of 256 was chosen because it roughly doubles unwinder
++ * performance while only adding ~5% to the ORC data footprint.
++ */
++#define LOOKUP_BLOCK_ORDER    8
++#define LOOKUP_BLOCK_SIZE     (1 << LOOKUP_BLOCK_ORDER)
++
++#ifndef LINKER_SCRIPT
++
++extern unsigned int orc_lookup[];
++extern unsigned int orc_lookup_end[];
++
++#define LOOKUP_START_IP               (unsigned long)_stext
++#define LOOKUP_STOP_IP                (unsigned long)_etext
++
++#endif /* LINKER_SCRIPT */
++
++#endif /* _ORC_LOOKUP_H */
+diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
+index 7dc777a6cb40..9c9dc579bd7d 100644
+--- a/arch/x86/include/asm/orc_types.h
++++ b/arch/x86/include/asm/orc_types.h
+@@ -88,7 +88,7 @@ struct orc_entry {
+       unsigned        sp_reg:4;
+       unsigned        bp_reg:4;
+       unsigned        type:2;
+-};
++} __packed;
+ 
+ /*
+  * This struct is used by asm and inline asm code to manually annotate the
+diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
+index e6676495b125..25b8d31a007d 100644
+--- a/arch/x86/include/asm/unwind.h
++++ b/arch/x86/include/asm/unwind.h
+@@ -12,11 +12,14 @@ struct unwind_state {
+       struct task_struct *task;
+       int graph_idx;
+       bool error;
+-#ifdef CONFIG_FRAME_POINTER
++#if defined(CONFIG_ORC_UNWINDER)
++      bool signal, full_regs;
++      unsigned long sp, bp, ip;
++      struct pt_regs *regs;
++#elif defined(CONFIG_FRAME_POINTER)
+       bool got_irq;
+-      unsigned long *bp, *orig_sp;
++      unsigned long *bp, *orig_sp, ip;
+       struct pt_regs *regs;
+-      unsigned long ip;
+ #else
+       unsigned long *sp;
+ #endif
+@@ -24,41 +27,30 @@ struct unwind_state {
+ 
+ void __unwind_start(struct unwind_state *state, struct task_struct *task,
+                   struct pt_regs *regs, unsigned long *first_frame);
+-
+ bool unwind_next_frame(struct unwind_state *state);
+-
+ unsigned long unwind_get_return_address(struct unwind_state *state);
++unsigned long *unwind_get_return_address_ptr(struct unwind_state *state);
+ 
+ static inline bool unwind_done(struct unwind_state *state)
+ {
+       return state->stack_info.type == STACK_TYPE_UNKNOWN;
+ }
+ 
+-static inline
+-void unwind_start(struct unwind_state *state, struct task_struct *task,
+-                struct pt_regs *regs, unsigned long *first_frame)
+-{
+-      first_frame = first_frame ? : get_stack_pointer(task, regs);
+-
+-      __unwind_start(state, task, regs, first_frame);
+-}
+-
+ static inline bool unwind_error(struct unwind_state *state)
+ {
+       return state->error;
+ }
+ 
+-#ifdef CONFIG_FRAME_POINTER
+-
+ static inline
+-unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
++void unwind_start(struct unwind_state *state, struct task_struct *task,
++                struct pt_regs *regs, unsigned long *first_frame)
+ {
+-      if (unwind_done(state))
+-              return NULL;
++      first_frame = first_frame ? : get_stack_pointer(task, regs);
+ 
+-      return state->regs ? &state->regs->ip : state->bp + 1;
++      __unwind_start(state, task, regs, first_frame);
+ }
+ 
++#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER)
+ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+ {
+       if (unwind_done(state))
+@@ -66,20 +58,46 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+ 
+       return state->regs;
+ }
+-
+-#else /* !CONFIG_FRAME_POINTER */
+-
+-static inline
+-unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
++#else
++static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+ {
+       return NULL;
+ }
++#endif
+ 
+-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
++#ifdef CONFIG_ORC_UNWINDER
++void unwind_init(void);
++void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
++                      void *orc, size_t orc_size);
++#else
++static inline void unwind_init(void) {}
++static inline
++void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
++                      void *orc, size_t orc_size) {}
++#endif
++
++/*
++ * This disables KASAN checking when reading a value from another task's stack,
++ * since the other task could be running on another CPU and could have poisoned
++ * the stack in the meantime.
++ */
++#define READ_ONCE_TASK_STACK(task, x)                 \
++({                                                    \
++      unsigned long val;                              \
++      if (task == current)                            \
++              val = READ_ONCE(x);                     \
++      else                                            \
++              val = READ_ONCE_NOCHECK(x);             \
++      val;                                            \
++})
++
++static inline bool task_on_another_cpu(struct task_struct *task)
+ {
+-      return NULL;
++#ifdef CONFIG_SMP
++      return task != current && task->on_cpu;
++#else
++      return false;
++#endif
+ }
+ 
+-#endif /* CONFIG_FRAME_POINTER */
+-
+ #endif /* _ASM_X86_UNWIND_H */
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index e7e955d4ab9e..9fdb54a95976 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -686,6 +686,31 @@
+ #define BUG_TABLE
+ #endif
+ 
++#ifdef CONFIG_ORC_UNWINDER
++#define ORC_UNWIND_TABLE                                              \
++      . = ALIGN(4);                                                   \
++      .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) {       \
++              VMLINUX_SYMBOL(__start_orc_unwind_ip) = .;              \
++              KEEP(*(.orc_unwind_ip))                                 \
++              VMLINUX_SYMBOL(__stop_orc_unwind_ip) = .;               \
++      }                                                               \
++      . = ALIGN(6);                                                   \
++      .orc_unwind : AT(ADDR(.orc_unwind) - LOAD_OFFSET) {             \
++              VMLINUX_SYMBOL(__start_orc_unwind) = .;                 \
++              KEEP(*(.orc_unwind))                                    \
++              VMLINUX_SYMBOL(__stop_orc_unwind) = .;                  \
++      }                                                               \
++      . = ALIGN(4);                                                   \
++      .orc_lookup : AT(ADDR(.orc_lookup) - LOAD_OFFSET) {             \
++              VMLINUX_SYMBOL(orc_lookup) = .;                         \
++              . += (((SIZEOF(.text) + LOOKUP_BLOCK_SIZE - 1) /        \
++                      LOOKUP_BLOCK_SIZE) + 1) * 4;                    \
++              VMLINUX_SYMBOL(orc_lookup_end) = .;                     \
++      }
++#else
++#define ORC_UNWIND_TABLE
++#endif
++
+ #ifdef CONFIG_PM_TRACE
+ #define TRACEDATA                                                     \
+       . = ALIGN(4);                                                   \
+@@ -872,7 +897,7 @@
+               DATA_DATA                                               \
+               CONSTRUCTORS                                            \
+       }                                                               \
+-      BUG_TABLE
++      BUG_TABLE                                                       \
+ 
+ #define INIT_TEXT_SECTION(inittext_align)                             \
+       . = ALIGN(inittext_align);                                      \
+diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
+index f67bd3205df7..62e7d70aadd5 100644
+--- a/arch/x86/kernel/module.c
++++ b/arch/x86/kernel/module.c
+@@ -35,6 +35,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/setup.h>
++#include <asm/unwind.h>
+ 
+ #if 0
+ #define DEBUGP(fmt, ...)                              \
+@@ -213,7 +214,7 @@ int module_finalize(const Elf_Ehdr *hdr,
+                   struct module *me)
+ {
+       const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+-              *para = NULL;
++              *para = NULL, *orc = NULL, *orc_ip = NULL;
+       char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ 
+       for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+@@ -225,6 +226,10 @@ int module_finalize(const Elf_Ehdr *hdr,
+                       locks = s;
+               if (!strcmp(".parainstructions", secstrings + s->sh_name))
+                       para = s;
++              if (!strcmp(".orc_unwind", secstrings + s->sh_name))
++                      orc = s;
++              if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
++                      orc_ip = s;
+       }
+ 
+       if (alt) {
+@@ -248,6 +253,10 @@ int module_finalize(const Elf_Ehdr *hdr,
+       /* make jump label nops */
+       jump_label_apply_nops(me);
+ 
++      if (orc && orc_ip)
++              unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size,
++                                 (void *)orc->sh_addr, orc->sh_size);
++
+       return 0;
+ }
+ 
+diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
+index f964bfddfefd..dd6e8707e969 100644
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -121,6 +121,7 @@
+ #include <asm/microcode.h>
+ #include <asm/mmu_context.h>
+ #include <asm/kaslr.h>
++#include <asm/unwind.h>
+ 
+ /*
+  * max_low_pfn_mapped: highest direct mapped pfn under 4GB
+@@ -1325,6 +1326,8 @@ void __init setup_arch(char **cmdline_p)
+       if (efi_enabled(EFI_BOOT))
+               efi_apply_memmap_quirks();
+ #endif
++
++      unwind_init();
+ }
+ 
+ #ifdef CONFIG_X86_32
+diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
+index c29e5bc7e9c9..d145a0b1f529 100644
+--- a/arch/x86/kernel/unwind_frame.c
++++ b/arch/x86/kernel/unwind_frame.c
+@@ -10,20 +10,22 @@
+ 
+ #define FRAME_HEADER_SIZE (sizeof(long) * 2)
+ 
+-/*
+- * This disables KASAN checking when reading a value from another task's stack,
+- * since the other task could be running on another CPU and could have poisoned
+- * the stack in the meantime.
+- */
+-#define READ_ONCE_TASK_STACK(task, x)                 \
+-({                                                    \
+-      unsigned long val;                              \
+-      if (task == current)                            \
+-              val = READ_ONCE(x);                     \
+-      else                                            \
+-              val = READ_ONCE_NOCHECK(x);             \
+-      val;                                            \
+-})
++unsigned long unwind_get_return_address(struct unwind_state *state)
++{
++      if (unwind_done(state))
++              return 0;
++
++      return __kernel_text_address(state->ip) ? state->ip : 0;
++}
++EXPORT_SYMBOL_GPL(unwind_get_return_address);
++
++unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
++{
++      if (unwind_done(state))
++              return NULL;
++
++      return state->regs ? &state->regs->ip : state->bp + 1;
++}
+ 
+ static void unwind_dump(struct unwind_state *state)
+ {
+@@ -66,15 +68,6 @@ static void unwind_dump(struct unwind_state *state)
+       }
+ }
+ 
+-unsigned long unwind_get_return_address(struct unwind_state *state)
+-{
+-      if (unwind_done(state))
+-              return 0;
+-
+-      return __kernel_text_address(state->ip) ? state->ip : 0;
+-}
+-EXPORT_SYMBOL_GPL(unwind_get_return_address);
+-
+ static size_t regs_size(struct pt_regs *regs)
+ {
+       /* x86_32 regs from kernel mode are two words shorter: */
+diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
+index 039f36738e49..4f0e17b90463 100644
+--- a/arch/x86/kernel/unwind_guess.c
++++ b/arch/x86/kernel/unwind_guess.c
+@@ -19,6 +19,11 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
+ }
+ EXPORT_SYMBOL_GPL(unwind_get_return_address);
+ 
++unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
++{
++      return NULL;
++}
++
+ bool unwind_next_frame(struct unwind_state *state)
+ {
+       struct stack_info *info = &state->stack_info;
+diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
+new file mode 100644
+index 000000000000..570b70d3f604
+--- /dev/null
++++ b/arch/x86/kernel/unwind_orc.c
+@@ -0,0 +1,582 @@
++#include <linux/module.h>
++#include <linux/sort.h>
++#include <asm/ptrace.h>
++#include <asm/stacktrace.h>
++#include <asm/unwind.h>
++#include <asm/orc_types.h>
++#include <asm/orc_lookup.h>
++#include <asm/sections.h>
++
++#define orc_warn(fmt, ...) \
++      printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
++
++extern int __start_orc_unwind_ip[];
++extern int __stop_orc_unwind_ip[];
++extern struct orc_entry __start_orc_unwind[];
++extern struct orc_entry __stop_orc_unwind[];
++
++static DEFINE_MUTEX(sort_mutex);
++int *cur_orc_ip_table = __start_orc_unwind_ip;
++struct orc_entry *cur_orc_table = __start_orc_unwind;
++
++unsigned int lookup_num_blocks;
++bool orc_init;
++
++static inline unsigned long orc_ip(const int *ip)
++{
++      return (unsigned long)ip + *ip;
++}
++
++static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
++                                  unsigned int num_entries, unsigned long ip)
++{
++      int *first = ip_table;
++      int *last = ip_table + num_entries - 1;
++      int *mid = first, *found = first;
++
++      if (!num_entries)
++              return NULL;
++
++      /*
++       * Do a binary range search to find the rightmost duplicate of a given
++       * starting address.  Some entries are section terminators which are
++       * "weak" entries for ensuring there are no gaps.  They should be
++       * ignored when they conflict with a real entry.
++       */
++      while (first <= last) {
++              mid = first + ((last - first) / 2);
++
++              if (orc_ip(mid) <= ip) {
++                      found = mid;
++                      first = mid + 1;
++              } else
++                      last = mid - 1;
++      }
++
++      return u_table + (found - ip_table);
++}
++
++#ifdef CONFIG_MODULES
++static struct orc_entry *orc_module_find(unsigned long ip)
++{
++      struct module *mod;
++
++      mod = __module_address(ip);
++      if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip)
++              return NULL;
++      return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind,
++                        mod->arch.num_orcs, ip);
++}
++#else
++static struct orc_entry *orc_module_find(unsigned long ip)
++{
++      return NULL;
++}
++#endif
++
++static struct orc_entry *orc_find(unsigned long ip)
++{
++      if (!orc_init)
++              return NULL;
++
++      /* For non-init vmlinux addresses, use the fast lookup table: */
++      if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
++              unsigned int idx, start, stop;
++
++              idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
++
++              if (unlikely((idx >= lookup_num_blocks-1))) {
++                      orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%lx\n",
++                               idx, lookup_num_blocks, ip);
++                      return NULL;
++              }
++
++              start = orc_lookup[idx];
++              stop = orc_lookup[idx + 1] + 1;
++
++              if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
++                           (__start_orc_unwind + stop > __stop_orc_unwind))) {
++                      orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%lx\n",
++                               idx, lookup_num_blocks, start, stop, ip);
++                      return NULL;
++              }
++
++              return __orc_find(__start_orc_unwind_ip + start,
++                                __start_orc_unwind + start, stop - start, ip);
++      }
++
++      /* vmlinux .init slow lookup: */
++      if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext)
++              return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
++                                __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
++
++      /* Module lookup: */
++      return orc_module_find(ip);
++}
++
++static void orc_sort_swap(void *_a, void *_b, int size)
++{
++      struct orc_entry *orc_a, *orc_b;
++      struct orc_entry orc_tmp;
++      int *a = _a, *b = _b, tmp;
++      int delta = _b - _a;
++
++      /* Swap the .orc_unwind_ip entries: */
++      tmp = *a;
++      *a = *b + delta;
++      *b = tmp - delta;
++
++      /* Swap the corresponding .orc_unwind entries: */
++      orc_a = cur_orc_table + (a - cur_orc_ip_table);
++      orc_b = cur_orc_table + (b - cur_orc_ip_table);
++      orc_tmp = *orc_a;
++      *orc_a = *orc_b;
++      *orc_b = orc_tmp;
++}
++
++static int orc_sort_cmp(const void *_a, const void *_b)
++{
++      struct orc_entry *orc_a;
++      const int *a = _a, *b = _b;
++      unsigned long a_val = orc_ip(a);
++      unsigned long b_val = orc_ip(b);
++
++      if (a_val > b_val)
++              return 1;
++      if (a_val < b_val)
++              return -1;
++
++      /*
++       * The "weak" section terminator entries need to always be on the left
++       * to ensure the lookup code skips them in favor of real entries.
++       * These terminator entries exist to handle any gaps created by
++       * whitelisted .o files which didn't get objtool generation.
++       */
++      orc_a = cur_orc_table + (a - cur_orc_ip_table);
++      return orc_a->sp_reg == ORC_REG_UNDEFINED ? -1 : 1;
++}
++
++#ifdef CONFIG_MODULES
++void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
++                      void *_orc, size_t orc_size)
++{
++      int *orc_ip = _orc_ip;
++      struct orc_entry *orc = _orc;
++      unsigned int num_entries = orc_ip_size / sizeof(int);
++
++      WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 ||
++                   orc_size % sizeof(*orc) != 0 ||
++                   num_entries != orc_size / sizeof(*orc));
++
++      /*
++       * The 'cur_orc_*' globals allow the orc_sort_swap() callback to
++       * associate an .orc_unwind_ip table entry with its corresponding
++       * .orc_unwind entry so they can both be swapped.
++       */
++      mutex_lock(&sort_mutex);
++      cur_orc_ip_table = orc_ip;
++      cur_orc_table = orc;
++      sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap);
++      mutex_unlock(&sort_mutex);
++
++      mod->arch.orc_unwind_ip = orc_ip;
++      mod->arch.orc_unwind = orc;
++      mod->arch.num_orcs = num_entries;
++}
++#endif
++
++void __init unwind_init(void)
++{
++      size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
++      size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
++      size_t num_entries = orc_ip_size / sizeof(int);
++      struct orc_entry *orc;
++      int i;
++
++      if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
++          orc_size % sizeof(struct orc_entry) != 0 ||
++          num_entries != orc_size / sizeof(struct orc_entry)) {
++              orc_warn("WARNING: Bad or missing .orc_unwind table.  Disabling unwinder.\n");
++              return;
++      }
++
++      /* Sort the .orc_unwind and .orc_unwind_ip tables: */
++      sort(__start_orc_unwind_ip, num_entries, sizeof(int), orc_sort_cmp,
++           orc_sort_swap);
++
++      /* Initialize the fast lookup table: */
++      lookup_num_blocks = orc_lookup_end - orc_lookup;
++      for (i = 0; i < lookup_num_blocks-1; i++) {
++              orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
++                               num_entries,
++                               LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
++              if (!orc) {
++                      orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
++                      return;
++              }
++
++              orc_lookup[i] = orc - __start_orc_unwind;
++      }
++
++      /* Initialize the ending block: */
++      orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries,
++                       LOOKUP_STOP_IP);
++      if (!orc) {
++              orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
++              return;
++      }
++      orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;
++
++      orc_init = true;
++}
++
++unsigned long unwind_get_return_address(struct unwind_state *state)
++{
++      if (unwind_done(state))
++              return 0;
++
++      return __kernel_text_address(state->ip) ? state->ip : 0;
++}
++EXPORT_SYMBOL_GPL(unwind_get_return_address);
++
++unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
++{
++      if (unwind_done(state))
++              return NULL;
++
++      if (state->regs)
++              return &state->regs->ip;
++
++      if (state->sp)
++              return (unsigned long *)state->sp - 1;
++
++      return NULL;
++}
++
++static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
++                          size_t len)
++{
++      struct stack_info *info = &state->stack_info;
++
++      /*
++       * If the address isn't on the current stack, switch to the next one.
++       *
++       * We may have to traverse multiple stacks to deal with the possibility
++       * that info->next_sp could point to an empty stack and the address
++       * could be on a subsequent stack.
++       */
++      while (!on_stack(info, (void *)addr, len))
++              if (get_stack_info(info->next_sp, state->task, info,
++                                 &state->stack_mask))
++                      return false;
++
++      return true;
++}
++
++static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
++                          unsigned long *val)
++{
++      if (!stack_access_ok(state, addr, sizeof(long)))
++              return false;
++
++      *val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr);
++      return true;
++}
++
++#define REGS_SIZE (sizeof(struct pt_regs))
++#define SP_OFFSET (offsetof(struct pt_regs, sp))
++#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
++#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
++
++static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
++                           unsigned long *ip, unsigned long *sp, bool full)
++{
++      size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
++      size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
++      struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
++
++      if (IS_ENABLED(CONFIG_X86_64)) {
++              if (!stack_access_ok(state, addr, regs_size))
++                      return false;
++
++              *ip = regs->ip;
++              *sp = regs->sp;
++
++              return true;
++      }
++
++      if (!stack_access_ok(state, addr, sp_offset))
++              return false;
++
++      *ip = regs->ip;
++
++      if (user_mode(regs)) {
++              if (!stack_access_ok(state, addr + sp_offset,
++                                   REGS_SIZE - SP_OFFSET))
++                      return false;
++
++              *sp = regs->sp;
++      } else
++              *sp = (unsigned long)&regs->sp;
++
++      return true;
++}
++
++bool unwind_next_frame(struct unwind_state *state)
++{
++      unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
++      enum stack_type prev_type = state->stack_info.type;
++      struct orc_entry *orc;
++      struct pt_regs *ptregs;
++      bool indirect = false;
++
++      if (unwind_done(state))
++              return false;
++
++      /* Don't let modules unload while we're reading their ORC data. */
++      preempt_disable();
++
++      /* Have we reached the end? */
++      if (state->regs && user_mode(state->regs))
++              goto done;
++
++      /*
++       * Find the orc_entry associated with the text address.
++       *
++       * Decrement call return addresses by one so they work for sibling
++       * calls and calls to noreturn functions.
++       */
++      orc = orc_find(state->signal ? state->ip : state->ip - 1);
++      if (!orc || orc->sp_reg == ORC_REG_UNDEFINED)
++              goto done;
++      orig_ip = state->ip;
++
++      /* Find the previous frame's stack: */
++      switch (orc->sp_reg) {
++      case ORC_REG_SP:
++              sp = state->sp + orc->sp_offset;
++              break;
++
++      case ORC_REG_BP:
++              sp = state->bp + orc->sp_offset;
++              break;
++
++      case ORC_REG_SP_INDIRECT:
++              sp = state->sp + orc->sp_offset;
++              indirect = true;
++              break;
++
++      case ORC_REG_BP_INDIRECT:
++              sp = state->bp + orc->sp_offset;
++              indirect = true;
++              break;
++
++      case ORC_REG_R10:
++              if (!state->regs || !state->full_regs) {
++                      orc_warn("missing regs for base reg R10 at ip %p\n",
++                               (void *)state->ip);
++                      goto done;
++              }
++              sp = state->regs->r10;
++              break;
++
++      case ORC_REG_R13:
++              if (!state->regs || !state->full_regs) {
++                      orc_warn("missing regs for base reg R13 at ip %p\n",
++                               (void *)state->ip);
++                      goto done;
++              }
++              sp = state->regs->r13;
++              break;
++
++      case ORC_REG_DI:
++              if (!state->regs || !state->full_regs) {
++                      orc_warn("missing regs for base reg DI at ip %p\n",
++                               (void *)state->ip);
++                      goto done;
++              }
++              sp = state->regs->di;
++              break;
++
++      case ORC_REG_DX:
++              if (!state->regs || !state->full_regs) {
++                      orc_warn("missing regs for base reg DX at ip %p\n",
++                               (void *)state->ip);
++                      goto done;
++              }
++              sp = state->regs->dx;
++              break;
++
++      default:
++              orc_warn("unknown SP base reg %d for ip %p\n",
++                       orc->sp_reg, (void *)state->ip);
++              goto done;
++      }
++
++      if (indirect) {
++              if (!deref_stack_reg(state, sp, &sp))
++                      goto done;
++      }
++
++      /* Find IP, SP and possibly regs: */
++      switch (orc->type) {
++      case ORC_TYPE_CALL:
++              ip_p = sp - sizeof(long);
++
++              if (!deref_stack_reg(state, ip_p, &state->ip))
++                      goto done;
++
++              state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
++                                                state->ip, (void *)ip_p);
++
++              state->sp = sp;
++              state->regs = NULL;
++              state->signal = false;
++              break;
++
++      case ORC_TYPE_REGS:
++              if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
++                      orc_warn("can't dereference registers at %p for ip %p\n",
++                               (void *)sp, (void *)orig_ip);
++                      goto done;
++              }
++
++              state->regs = (struct pt_regs *)sp;
++              state->full_regs = true;
++              state->signal = true;
++              break;
++
++      case ORC_TYPE_REGS_IRET:
++              if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
++                      orc_warn("can't dereference iret registers at %p for ip %p\n",
++                               (void *)sp, (void *)orig_ip);
++                      goto done;
++              }
++
++              ptregs = container_of((void *)sp, struct pt_regs, ip);
++              if ((unsigned long)ptregs >= prev_sp &&
++                  on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
++                      state->regs = ptregs;
++                      state->full_regs = false;
++              } else
++                      state->regs = NULL;
++
++              state->signal = true;
++              break;
++
++      default:
++              orc_warn("unknown .orc_unwind entry type %d\n", orc->type);
++              break;
++      }
++
++      /* Find BP: */
++      switch (orc->bp_reg) {
++      case ORC_REG_UNDEFINED:
++              if (state->regs && state->full_regs)
++                      state->bp = state->regs->bp;
++              break;
++
++      case ORC_REG_PREV_SP:
++              if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp))
++                      goto done;
++              break;
++
++      case ORC_REG_BP:
++              if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp))
++                      goto done;
++              break;
++
++      default:
++              orc_warn("unknown BP base reg %d for ip %p\n",
++                       orc->bp_reg, (void *)orig_ip);
++              goto done;
++      }
++
++      /* Prevent a recursive loop due to bad ORC data: */
++      if (state->stack_info.type == prev_type &&
++          on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
++          state->sp <= prev_sp) {
++              orc_warn("stack going in the wrong direction? ip=%p\n",
++                       (void *)orig_ip);
++              goto done;
++      }
++
++      preempt_enable();
++      return true;
++
++done:
++      preempt_enable();
++      state->stack_info.type = STACK_TYPE_UNKNOWN;
++      return false;
++}
++EXPORT_SYMBOL_GPL(unwind_next_frame);
++
++void __unwind_start(struct unwind_state *state, struct task_struct *task,
++                  struct pt_regs *regs, unsigned long *first_frame)
++{
++      memset(state, 0, sizeof(*state));
++      state->task = task;
++
++      /*
++       * Refuse to unwind the stack of a task while it's executing on another
++       * CPU.  This check is racy, but that's ok: the unwinder has other
++       * checks to prevent it from going off the rails.
++       */
++      if (task_on_another_cpu(task))
++              goto done;
++
++      if (regs) {
++              if (user_mode(regs))
++                      goto done;
++
++              state->ip = regs->ip;
++              state->sp = kernel_stack_pointer(regs);
++              state->bp = regs->bp;
++              state->regs = regs;
++              state->full_regs = true;
++              state->signal = true;
++
++      } else if (task == current) {
++              asm volatile("lea (%%rip), %0\n\t"
++                           "mov %%rsp, %1\n\t"
++                           "mov %%rbp, %2\n\t"
++                           : "=r" (state->ip), "=r" (state->sp),
++                             "=r" (state->bp));
++
++      } else {
++              struct inactive_task_frame *frame = (void *)task->thread.sp;
++
++              state->sp = task->thread.sp;
++              state->bp = READ_ONCE_NOCHECK(frame->bp);
++              state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
++      }
++
++      if (get_stack_info((unsigned long *)state->sp, state->task,
++                         &state->stack_info, &state->stack_mask))
++              return;
++
++      /*
++       * The caller can provide the address of the first frame directly
++       * (first_frame) or indirectly (regs->sp) to indicate which stack frame
++       * to start unwinding at.  Skip ahead until we reach it.
++       */
++
++      /* When starting from regs, skip the regs frame: */
++      if (regs) {
++              unwind_next_frame(state);
++              return;
++      }
++
++      /* Otherwise, skip ahead to the user-specified starting frame: */
++      while (!unwind_done(state) &&
++             (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
++                      state->sp <= (unsigned long)first_frame))
++              unwind_next_frame(state);
++
++      return;
++
++done:
++      state->stack_info.type = STACK_TYPE_UNKNOWN;
++      return;
++}
++EXPORT_SYMBOL_GPL(__unwind_start);
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 658fcf67862c..d6f45f6d1054 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -158,6 +158,7 @@ config X86
+       select HAVE_MEMBLOCK
+       select HAVE_MEMBLOCK_NODE_MAP
+       select HAVE_MIXED_BREAKPOINTS_REGS
++      select HAVE_MOD_ARCH_SPECIFIC
+       select HAVE_NMI
+       select HAVE_OPROFILE
+       select HAVE_OPTPROBES
+diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
+index 1fc519f3c49e..d5bca2ec8a74 100644
+--- a/arch/x86/Kconfig.debug
++++ b/arch/x86/Kconfig.debug
+@@ -356,4 +356,29 @@ config PUNIT_ATOM_DEBUG
+         The current power state can be read from
+         /sys/kernel/debug/punit_atom/dev_power_state
+ 
++config ORC_UNWINDER
++      bool "ORC unwinder"
++      depends on X86_64
++      select STACK_VALIDATION
++      ---help---
++        This option enables the ORC (Oops Rewind Capability) unwinder for
++        unwinding kernel stack traces.  It uses a custom data format which is
++        a simplified version of the DWARF Call Frame Information standard.
++
++        This unwinder is more accurate across interrupt entry frames than the
++        frame pointer unwinder.  It can also enable a 5-10% performance
++        improvement across the entire kernel if CONFIG_FRAME_POINTER is
++        disabled.
++
++        Enabling this option will increase the kernel's runtime memory usage
++        by roughly 2-4MB, depending on your kernel config.
++
++config FRAME_POINTER_UNWINDER
++      def_bool y
++      depends on !ORC_UNWINDER && FRAME_POINTER
++
++config GUESS_UNWINDER
++      def_bool y
++      depends on !ORC_UNWINDER && !FRAME_POINTER
++
+ endmenu
+diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
+index c8a3b61be0aa..f05f00acac89 100644
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -24,6 +24,7 @@
+ #include <asm/asm-offsets.h>
+ #include <asm/thread_info.h>
+ #include <asm/page_types.h>
++#include <asm/orc_lookup.h>
+ #include <asm/cache.h>
+ #include <asm/boot.h>
+ 
+@@ -148,6 +149,8 @@ SECTIONS
+ 
+       BUG_TABLE
+ 
++      ORC_UNWIND_TABLE
++
+       . = ALIGN(PAGE_SIZE);
+       __vvar_page = .;
+ 
+diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
+index c617b9d1d6cb..0b4d1b3880b0 100644
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -374,6 +374,9 @@ config STACK_VALIDATION
+         pointers (if CONFIG_FRAME_POINTER is enabled).  This helps ensure
+         that runtime stack traces are more reliable.
+ 
++        This is also a prerequisite for generation of ORC unwind data, which
++        is needed for CONFIG_ORC_UNWINDER.
++
+         For more information, see
+         tools/objtool/Documentation/stack-validation.txt.
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0039-objtool-Upgrade-libelf-devel-warning-to-error-for-CO.patch b/patches/kernel/0039-objtool-Upgrade-libelf-devel-warning-to-error-for-CO.patch

deleted file mode 100644 (file)

index c2217fc..0000000
--- a/patches/kernel/0039-objtool-Upgrade-libelf-devel-warning-to-error-for-CO.patch
+++ /dev/null
@@ -1,51 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Tue, 3 Oct 2017 20:10:36 -0500
-Subject: [PATCH] objtool: Upgrade libelf-devel warning to error for
- CONFIG_ORC_UNWINDER
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-With CONFIG_ORC_UNWINDER, if the user doesn't have libelf-devel
-installed, and they don't see the make warning, their ORC unwinder will
-be silently broken.  Upgrade the warning to an error.
-
-Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/d9dfc39fb8240998820f9efb233d283a1ee96084.1507079417.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 3dd40cb320fee7c23b574ab821ce140ccd1281c9)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit c413466a72ca533ec126ebc0c5bb579ae0c96b1d)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Makefile | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/Makefile b/Makefile
-index 8e14a926fc94..490ce18685ea 100644
---- a/Makefile
-+++ b/Makefile
-@@ -965,7 +965,11 @@ ifdef CONFIG_STACK_VALIDATION
-   ifeq ($(has_libelf),1)
-     objtool_target := tools/objtool FORCE
-   else
--    $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
-+    ifdef CONFIG_ORC_UNWINDER
-+      $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
-+    else
-+      $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
-+    endif
-     SKIP_STACK_VALIDATION := 1
-     export SKIP_STACK_VALIDATION
-   endif
--- 
-2.14.2
-
diff --git a/patches/kernel/0039-x86-kconfig-Consolidate-unwinders-into-multiple-choi.patch b/patches/kernel/0039-x86-kconfig-Consolidate-unwinders-into-multiple-choi.patch

new file mode 100644 (file)

index 0000000..f588b6e
--- /dev/null
+++ b/patches/kernel/0039-x86-kconfig-Consolidate-unwinders-into-multiple-choi.patch
@@ -0,0 +1,171 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Tue, 25 Jul 2017 08:54:24 -0500
+Subject: [PATCH] x86/kconfig: Consolidate unwinders into multiple choice
+ selection
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+There are three mutually exclusive unwinders.  Make that more obvious by
+combining them into a multiple-choice selection:
+
+  CONFIG_FRAME_POINTER_UNWINDER
+  CONFIG_ORC_UNWINDER
+  CONFIG_GUESS_UNWINDER (if CONFIG_EXPERT=y)
+
+Frame pointers are still the default (for now).
+
+The old CONFIG_FRAME_POINTER option is still used in some
+arch-independent places, so keep it around, but make it
+invisible to the user on x86 - it's now selected by
+CONFIG_FRAME_POINTER_UNWINDER=y.
+
+Suggested-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: live-patching@vger.kernel.org
+Link: http://lkml.kernel.org/r/20170725135424.zukjmgpz3plf5pmt@treble
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 81d387190039c14edac8de2b3ec789beb899afd9)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 26ddacc1e6333555e4a6bd63c4c935b323509f92)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/unwind.h |  4 ++--
+ arch/x86/Kconfig              |  3 +--
+ arch/x86/Kconfig.debug        | 45 +++++++++++++++++++++++++++++++++++++------
+ arch/x86/configs/tiny.config  |  2 ++
+ 4 files changed, 44 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
+index 25b8d31a007d..e9f793e2df7a 100644
+--- a/arch/x86/include/asm/unwind.h
++++ b/arch/x86/include/asm/unwind.h
+@@ -16,7 +16,7 @@ struct unwind_state {
+       bool signal, full_regs;
+       unsigned long sp, bp, ip;
+       struct pt_regs *regs;
+-#elif defined(CONFIG_FRAME_POINTER)
++#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
+       bool got_irq;
+       unsigned long *bp, *orig_sp, ip;
+       struct pt_regs *regs;
+@@ -50,7 +50,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
+       __unwind_start(state, task, regs, first_frame);
+ }
+ 
+-#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER)
++#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
+ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+ {
+       if (unwind_done(state))
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index d6f45f6d1054..3a0b8cb57caf 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -73,7 +73,6 @@ config X86
+       select ARCH_USE_QUEUED_RWLOCKS
+       select ARCH_USE_QUEUED_SPINLOCKS
+       select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+-      select ARCH_WANT_FRAME_POINTERS
+       select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+       select ARCH_WANTS_THP_SWAP              if X86_64
+       select BUILDTIME_EXTABLE_SORT
+@@ -169,7 +168,7 @@ config X86
+       select HAVE_PERF_REGS
+       select HAVE_PERF_USER_STACK_DUMP
+       select HAVE_REGS_AND_STACK_ACCESS_API
+-      select HAVE_RELIABLE_STACKTRACE         if X86_64 && FRAME_POINTER && STACK_VALIDATION
++      select HAVE_RELIABLE_STACKTRACE         if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
+       select HAVE_STACK_VALIDATION            if X86_64
+       select HAVE_SYSCALL_TRACEPOINTS
+       select HAVE_UNSTABLE_SCHED_CLOCK
+diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
+index d5bca2ec8a74..c441b5d65ec8 100644
+--- a/arch/x86/Kconfig.debug
++++ b/arch/x86/Kconfig.debug
+@@ -356,6 +356,29 @@ config PUNIT_ATOM_DEBUG
+         The current power state can be read from
+         /sys/kernel/debug/punit_atom/dev_power_state
+ 
++choice
++      prompt "Choose kernel unwinder"
++      default FRAME_POINTER_UNWINDER
++      ---help---
++        This determines which method will be used for unwinding kernel stack
++        traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
++        livepatch, lockdep, and more.
++
++config FRAME_POINTER_UNWINDER
++      bool "Frame pointer unwinder"
++      select FRAME_POINTER
++      ---help---
++        This option enables the frame pointer unwinder for unwinding kernel
++        stack traces.
++
++        The unwinder itself is fast and it uses less RAM than the ORC
++        unwinder, but the kernel text size will grow by ~3% and the kernel's
++        overall performance will degrade by roughly 5-10%.
++
++        This option is recommended if you want to use the livepatch
++        consistency model, as this is currently the only way to get a
++        reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
++
+ config ORC_UNWINDER
+       bool "ORC unwinder"
+       depends on X86_64
+@@ -373,12 +396,22 @@ config ORC_UNWINDER
+         Enabling this option will increase the kernel's runtime memory usage
+         by roughly 2-4MB, depending on your kernel config.
+ 
+-config FRAME_POINTER_UNWINDER
+-      def_bool y
+-      depends on !ORC_UNWINDER && FRAME_POINTER
+-
+ config GUESS_UNWINDER
+-      def_bool y
+-      depends on !ORC_UNWINDER && !FRAME_POINTER
++      bool "Guess unwinder"
++      depends on EXPERT
++      ---help---
++        This option enables the "guess" unwinder for unwinding kernel stack
++        traces.  It scans the stack and reports every kernel text address it
++        finds.  Some of the addresses it reports may be incorrect.
++
++        While this option often produces false positives, it can still be
++        useful in many cases.  Unlike the other unwinders, it has no runtime
++        overhead.
++
++endchoice
++
++config FRAME_POINTER
++      depends on !ORC_UNWINDER && !GUESS_UNWINDER
++      bool
+ 
+ endmenu
+diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
+index 4b429df40d7a..550cd5012b73 100644
+--- a/arch/x86/configs/tiny.config
++++ b/arch/x86/configs/tiny.config
+@@ -1,3 +1,5 @@
+ CONFIG_NOHIGHMEM=y
+ # CONFIG_HIGHMEM4G is not set
+ # CONFIG_HIGHMEM64G is not set
++CONFIG_GUESS_UNWINDER=y
++# CONFIG_FRAME_POINTER_UNWINDER is not set
+-- 
+2.14.2
+
diff --git a/patches/kernel/0040-objtool-Upgrade-libelf-devel-warning-to-error-for-CO.patch b/patches/kernel/0040-objtool-Upgrade-libelf-devel-warning-to-error-for-CO.patch

new file mode 100644 (file)

index 0000000..c2217fc
--- /dev/null
+++ b/patches/kernel/0040-objtool-Upgrade-libelf-devel-warning-to-error-for-CO.patch
@@ -0,0 +1,51 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Tue, 3 Oct 2017 20:10:36 -0500
+Subject: [PATCH] objtool: Upgrade libelf-devel warning to error for
+ CONFIG_ORC_UNWINDER
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+With CONFIG_ORC_UNWINDER, if the user doesn't have libelf-devel
+installed, and they don't see the make warning, their ORC unwinder will
+be silently broken.  Upgrade the warning to an error.
+
+Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/d9dfc39fb8240998820f9efb233d283a1ee96084.1507079417.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 3dd40cb320fee7c23b574ab821ce140ccd1281c9)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit c413466a72ca533ec126ebc0c5bb579ae0c96b1d)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Makefile | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/Makefile b/Makefile
+index 8e14a926fc94..490ce18685ea 100644
+--- a/Makefile
++++ b/Makefile
+@@ -965,7 +965,11 @@ ifdef CONFIG_STACK_VALIDATION
+   ifeq ($(has_libelf),1)
+     objtool_target := tools/objtool FORCE
+   else
+-    $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
++    ifdef CONFIG_ORC_UNWINDER
++      $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
++    else
++      $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
++    endif
+     SKIP_STACK_VALIDATION := 1
+     export SKIP_STACK_VALIDATION
+   endif
+-- 
+2.14.2
+
diff --git a/patches/kernel/0040-x86-ldt-64-Refresh-DS-and-ES-when-modify_ldt-changes.patch b/patches/kernel/0040-x86-ldt-64-Refresh-DS-and-ES-when-modify_ldt-changes.patch

deleted file mode 100644 (file)

index 91ade6a..0000000
--- a/patches/kernel/0040-x86-ldt-64-Refresh-DS-and-ES-when-modify_ldt-changes.patch
+++ /dev/null
@@ -1,82 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Wed, 26 Jul 2017 07:16:30 -0700
-Subject: [PATCH] x86/ldt/64: Refresh DS and ES when modify_ldt changes an
- entry
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-On x86_32, modify_ldt() implicitly refreshes the cached DS and ES
-segments because they are refreshed on return to usermode.
-
-On x86_64, they're not refreshed on return to usermode.  To improve
-determinism and match x86_32's behavior, refresh them when we update
-the LDT.
-
-This avoids a situation in which the DS points to a descriptor that is
-changed but the old cached segment persists until the next reschedule.
-If this happens, then the user-visible state will change
-nondeterministically some time after modify_ldt() returns, which is
-unfortunate.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Chang Seok <chang.seok.bae@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit a632375764aa25c97b78beb56c71b0ba59d1cf83)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 295cb0b06150958ec84ee4b8844ef7e389e22c4e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/ldt.c | 21 +++++++++++++++++++++
- 1 file changed, 21 insertions(+)
-
-diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
-index a870910c8565..f0e64db18ac8 100644
---- a/arch/x86/kernel/ldt.c
-+++ b/arch/x86/kernel/ldt.c
-@@ -21,6 +21,25 @@
- #include <asm/mmu_context.h>
- #include <asm/syscalls.h>
- 
-+static void refresh_ldt_segments(void)
-+{
-+#ifdef CONFIG_X86_64
-+      unsigned short sel;
-+
-+      /*
-+       * Make sure that the cached DS and ES descriptors match the updated
-+       * LDT.
-+       */
-+      savesegment(ds, sel);
-+      if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
-+              loadsegment(ds, sel);
-+
-+      savesegment(es, sel);
-+      if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
-+              loadsegment(es, sel);
-+#endif
-+}
-+
- /* context.lock is held for us, so we don't need any locking. */
- static void flush_ldt(void *__mm)
- {
-@@ -32,6 +51,8 @@ static void flush_ldt(void *__mm)
- 
-       pc = &mm->context;
-       set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
-+
-+      refresh_ldt_segments();
- }
- 
- /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
--- 
-2.14.2
-
diff --git a/patches/kernel/0041-x86-ldt-64-Refresh-DS-and-ES-when-modify_ldt-changes.patch b/patches/kernel/0041-x86-ldt-64-Refresh-DS-and-ES-when-modify_ldt-changes.patch

new file mode 100644 (file)

index 0000000..91ade6a
--- /dev/null
+++ b/patches/kernel/0041-x86-ldt-64-Refresh-DS-and-ES-when-modify_ldt-changes.patch
@@ -0,0 +1,82 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Wed, 26 Jul 2017 07:16:30 -0700
+Subject: [PATCH] x86/ldt/64: Refresh DS and ES when modify_ldt changes an
+ entry
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+On x86_32, modify_ldt() implicitly refreshes the cached DS and ES
+segments because they are refreshed on return to usermode.
+
+On x86_64, they're not refreshed on return to usermode.  To improve
+determinism and match x86_32's behavior, refresh them when we update
+the LDT.
+
+This avoids a situation in which the DS points to a descriptor that is
+changed but the old cached segment persists until the next reschedule.
+If this happens, then the user-visible state will change
+nondeterministically some time after modify_ldt() returns, which is
+unfortunate.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Chang Seok <chang.seok.bae@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit a632375764aa25c97b78beb56c71b0ba59d1cf83)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 295cb0b06150958ec84ee4b8844ef7e389e22c4e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/ldt.c | 21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
+index a870910c8565..f0e64db18ac8 100644
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -21,6 +21,25 @@
+ #include <asm/mmu_context.h>
+ #include <asm/syscalls.h>
+ 
++static void refresh_ldt_segments(void)
++{
++#ifdef CONFIG_X86_64
++      unsigned short sel;
++
++      /*
++       * Make sure that the cached DS and ES descriptors match the updated
++       * LDT.
++       */
++      savesegment(ds, sel);
++      if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
++              loadsegment(ds, sel);
++
++      savesegment(es, sel);
++      if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
++              loadsegment(es, sel);
++#endif
++}
++
+ /* context.lock is held for us, so we don't need any locking. */
+ static void flush_ldt(void *__mm)
+ {
+@@ -32,6 +51,8 @@ static void flush_ldt(void *__mm)
+ 
+       pc = &mm->context;
+       set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
++
++      refresh_ldt_segments();
+ }
+ 
+ /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0041-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch b/patches/kernel/0041-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch

deleted file mode 100644 (file)

index d260739..0000000
--- a/patches/kernel/0041-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch
+++ /dev/null
@@ -1,182 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 29 Jun 2017 08:53:15 -0700
-Subject: [PATCH] x86/mm: Give each mm TLB flush generation a unique ID
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This adds two new variables to mmu_context_t: ctx_id and tlb_gen.
-ctx_id uniquely identifies the mm_struct and will never be reused.
-For a given mm_struct (and hence ctx_id), tlb_gen is a monotonic
-count of the number of times that a TLB flush has been requested.
-The pair (ctx_id, tlb_gen) can be used as an identifier for TLB
-flush actions and will be used in subsequent patches to reliably
-determine whether all needed TLB flushes have occurred on a given
-CPU.
-
-This patch is split out for ease of review.  By itself, it has no
-real effect other than creating and updating the new variables.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/413a91c24dab3ed0caa5f4e4d017d87b0857f920.1498751203.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit f39681ed0f48498b80455095376f11535feea332)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit e566a0dfbb2a5f7ea90dd66ce384740372739e14)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/mmu.h         | 25 +++++++++++++++++++++++--
- arch/x86/include/asm/mmu_context.h |  6 ++++++
- arch/x86/include/asm/tlbflush.h    | 18 ++++++++++++++++++
- arch/x86/mm/tlb.c                  |  6 ++++--
- 4 files changed, 51 insertions(+), 4 deletions(-)
-
-diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
-index 79b647a7ebd0..bb8c597c2248 100644
---- a/arch/x86/include/asm/mmu.h
-+++ b/arch/x86/include/asm/mmu.h
-@@ -3,12 +3,28 @@
- 
- #include <linux/spinlock.h>
- #include <linux/mutex.h>
-+#include <linux/atomic.h>
- 
- /*
-- * The x86 doesn't have a mmu context, but
-- * we put the segment information here.
-+ * x86 has arch-specific MMU state beyond what lives in mm_struct.
-  */
- typedef struct {
-+      /*
-+       * ctx_id uniquely identifies this mm_struct.  A ctx_id will never
-+       * be reused, and zero is not a valid ctx_id.
-+       */
-+      u64 ctx_id;
-+
-+      /*
-+       * Any code that needs to do any sort of TLB flushing for this
-+       * mm will first make its changes to the page tables, then
-+       * increment tlb_gen, then flush.  This lets the low-level
-+       * flushing code keep track of what needs flushing.
-+       *
-+       * This is not used on Xen PV.
-+       */
-+      atomic64_t tlb_gen;
-+
- #ifdef CONFIG_MODIFY_LDT_SYSCALL
-       struct ldt_struct *ldt;
- #endif
-@@ -37,6 +53,11 @@ typedef struct {
- #endif
- } mm_context_t;
- 
-+#define INIT_MM_CONTEXT(mm)                                           \
-+      .context = {                                                    \
-+              .ctx_id = 1,                                            \
-+      }
-+
- void leave_mm(int cpu);
- 
- #endif /* _ASM_X86_MMU_H */
-diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index 7a234be7e298..6c05679c715b 100644
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -12,6 +12,9 @@
- #include <asm/tlbflush.h>
- #include <asm/paravirt.h>
- #include <asm/mpx.h>
-+
-+extern atomic64_t last_mm_ctx_id;
-+
- #ifndef CONFIG_PARAVIRT
- static inline void paravirt_activate_mm(struct mm_struct *prev,
-                                       struct mm_struct *next)
-@@ -132,6 +135,9 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
- static inline int init_new_context(struct task_struct *tsk,
-                                  struct mm_struct *mm)
- {
-+      mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
-+      atomic64_set(&mm->context.tlb_gen, 0);
-+
-       #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-       if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
-               /* pkey 0 is the default and always allocated */
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 2b3d68093235..f1f2e73b7b77 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -57,6 +57,23 @@ static inline void invpcid_flush_all_nonglobals(void)
-       __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
- }
- 
-+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
-+{
-+      u64 new_tlb_gen;
-+
-+      /*
-+       * Bump the generation count.  This also serves as a full barrier
-+       * that synchronizes with switch_mm(): callers are required to order
-+       * their read of mm_cpumask after their writes to the paging
-+       * structures.
-+       */
-+      smp_mb__before_atomic();
-+      new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
-+      smp_mb__after_atomic();
-+
-+      return new_tlb_gen;
-+}
-+
- #ifdef CONFIG_PARAVIRT
- #include <asm/paravirt.h>
- #else
-@@ -270,6 +287,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
- static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
-                                       struct mm_struct *mm)
- {
-+      inc_mm_tlb_gen(mm);
-       cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
- }
- 
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 014d07a80053..14f4f8f66aa8 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -28,6 +28,8 @@
-  *    Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
-  */
- 
-+atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
-+
- void leave_mm(int cpu)
- {
-       struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
-@@ -250,8 +252,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
- 
-       cpu = get_cpu();
- 
--      /* Synchronize with switch_mm. */
--      smp_mb();
-+      /* This is also a barrier that synchronizes with switch_mm(). */
-+      inc_mm_tlb_gen(mm);
- 
-       /* Should we flush just the requested range? */
-       if ((end != TLB_FLUSH_ALL) &&
--- 
-2.14.2
-
diff --git a/patches/kernel/0042-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch b/patches/kernel/0042-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch

new file mode 100644 (file)

index 0000000..d260739
--- /dev/null
+++ b/patches/kernel/0042-x86-mm-Give-each-mm-TLB-flush-generation-a-unique-ID.patch
@@ -0,0 +1,182 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:15 -0700
+Subject: [PATCH] x86/mm: Give each mm TLB flush generation a unique ID
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This adds two new variables to mmu_context_t: ctx_id and tlb_gen.
+ctx_id uniquely identifies the mm_struct and will never be reused.
+For a given mm_struct (and hence ctx_id), tlb_gen is a monotonic
+count of the number of times that a TLB flush has been requested.
+The pair (ctx_id, tlb_gen) can be used as an identifier for TLB
+flush actions and will be used in subsequent patches to reliably
+determine whether all needed TLB flushes have occurred on a given
+CPU.
+
+This patch is split out for ease of review.  By itself, it has no
+real effect other than creating and updating the new variables.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/413a91c24dab3ed0caa5f4e4d017d87b0857f920.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit f39681ed0f48498b80455095376f11535feea332)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit e566a0dfbb2a5f7ea90dd66ce384740372739e14)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/mmu.h         | 25 +++++++++++++++++++++++--
+ arch/x86/include/asm/mmu_context.h |  6 ++++++
+ arch/x86/include/asm/tlbflush.h    | 18 ++++++++++++++++++
+ arch/x86/mm/tlb.c                  |  6 ++++--
+ 4 files changed, 51 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
+index 79b647a7ebd0..bb8c597c2248 100644
+--- a/arch/x86/include/asm/mmu.h
++++ b/arch/x86/include/asm/mmu.h
+@@ -3,12 +3,28 @@
+ 
+ #include <linux/spinlock.h>
+ #include <linux/mutex.h>
++#include <linux/atomic.h>
+ 
+ /*
+- * The x86 doesn't have a mmu context, but
+- * we put the segment information here.
++ * x86 has arch-specific MMU state beyond what lives in mm_struct.
+  */
+ typedef struct {
++      /*
++       * ctx_id uniquely identifies this mm_struct.  A ctx_id will never
++       * be reused, and zero is not a valid ctx_id.
++       */
++      u64 ctx_id;
++
++      /*
++       * Any code that needs to do any sort of TLB flushing for this
++       * mm will first make its changes to the page tables, then
++       * increment tlb_gen, then flush.  This lets the low-level
++       * flushing code keep track of what needs flushing.
++       *
++       * This is not used on Xen PV.
++       */
++      atomic64_t tlb_gen;
++
+ #ifdef CONFIG_MODIFY_LDT_SYSCALL
+       struct ldt_struct *ldt;
+ #endif
+@@ -37,6 +53,11 @@ typedef struct {
+ #endif
+ } mm_context_t;
+ 
++#define INIT_MM_CONTEXT(mm)                                           \
++      .context = {                                                    \
++              .ctx_id = 1,                                            \
++      }
++
+ void leave_mm(int cpu);
+ 
+ #endif /* _ASM_X86_MMU_H */
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index 7a234be7e298..6c05679c715b 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -12,6 +12,9 @@
+ #include <asm/tlbflush.h>
+ #include <asm/paravirt.h>
+ #include <asm/mpx.h>
++
++extern atomic64_t last_mm_ctx_id;
++
+ #ifndef CONFIG_PARAVIRT
+ static inline void paravirt_activate_mm(struct mm_struct *prev,
+                                       struct mm_struct *next)
+@@ -132,6 +135,9 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+ static inline int init_new_context(struct task_struct *tsk,
+                                  struct mm_struct *mm)
+ {
++      mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
++      atomic64_set(&mm->context.tlb_gen, 0);
++
+       #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+       if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
+               /* pkey 0 is the default and always allocated */
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 2b3d68093235..f1f2e73b7b77 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -57,6 +57,23 @@ static inline void invpcid_flush_all_nonglobals(void)
+       __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+ }
+ 
++static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
++{
++      u64 new_tlb_gen;
++
++      /*
++       * Bump the generation count.  This also serves as a full barrier
++       * that synchronizes with switch_mm(): callers are required to order
++       * their read of mm_cpumask after their writes to the paging
++       * structures.
++       */
++      smp_mb__before_atomic();
++      new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
++      smp_mb__after_atomic();
++
++      return new_tlb_gen;
++}
++
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #else
+@@ -270,6 +287,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
+ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
+                                       struct mm_struct *mm)
+ {
++      inc_mm_tlb_gen(mm);
+       cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+ }
+ 
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 014d07a80053..14f4f8f66aa8 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -28,6 +28,8 @@
+  *    Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+  */
+ 
++atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
++
+ void leave_mm(int cpu)
+ {
+       struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+@@ -250,8 +252,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ 
+       cpu = get_cpu();
+ 
+-      /* Synchronize with switch_mm. */
+-      smp_mb();
++      /* This is also a barrier that synchronizes with switch_mm(). */
++      inc_mm_tlb_gen(mm);
+ 
+       /* Should we flush just the requested range? */
+       if ((end != TLB_FLUSH_ALL) &&
+-- 
+2.14.2
+
diff --git a/patches/kernel/0042-x86-mm-Track-the-TLB-s-tlb_gen-and-update-the-flushi.patch b/patches/kernel/0042-x86-mm-Track-the-TLB-s-tlb_gen-and-update-the-flushi.patch

deleted file mode 100644 (file)

index 2630f26..0000000
--- a/patches/kernel/0042-x86-mm-Track-the-TLB-s-tlb_gen-and-update-the-flushi.patch
+++ /dev/null
@@ -1,279 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 29 Jun 2017 08:53:16 -0700
-Subject: [PATCH] x86/mm: Track the TLB's tlb_gen and update the flushing
- algorithm
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-There are two kernel features that would benefit from tracking
-how up-to-date each CPU's TLB is in the case where IPIs aren't keeping
-it up to date in real time:
-
- - Lazy mm switching currently works by switching to init_mm when
-   it would otherwise flush.  This is wasteful: there isn't fundamentally
-   any need to update CR3 at all when going lazy or when returning from
-   lazy mode, nor is there any need to receive flush IPIs at all.  Instead,
-   we should just stop trying to keep the TLB coherent when we go lazy and,
-   when unlazying, check whether we missed any flushes.
-
- - PCID will let us keep recent user contexts alive in the TLB.  If we
-   start doing this, we need a way to decide whether those contexts are
-   up to date.
-
-On some paravirt systems, remote TLBs can be flushed without IPIs.
-This won't update the target CPUs' tlb_gens, which may cause
-unnecessary local flushes later on.  We can address this if it becomes
-a problem by carefully updating the target CPU's tlb_gen directly.
-
-By itself, this patch is a very minor optimization that avoids
-unnecessary flushes when multiple TLB flushes targetting the same CPU
-race.  The complexity in this patch would not be worth it on its own,
-but it will enable improved lazy TLB tracking and PCID.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/1210fb244bc9cbe7677f7f0b72db4d359675f24b.1498751203.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit b0579ade7cd82391360e959cc844e50a160e8a96)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d34881c25f3c70228ed792fd62881185a25c4422)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/tlbflush.h |  43 +++++++++++++++--
- arch/x86/mm/tlb.c               | 102 +++++++++++++++++++++++++++++++++++++---
- 2 files changed, 135 insertions(+), 10 deletions(-)
-
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index f1f2e73b7b77..3a167c214560 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -82,6 +82,11 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
- #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
- #endif
- 
-+struct tlb_context {
-+      u64 ctx_id;
-+      u64 tlb_gen;
-+};
-+
- struct tlb_state {
-       /*
-        * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
-@@ -97,6 +102,21 @@ struct tlb_state {
-        * disabling interrupts when modifying either one.
-        */
-       unsigned long cr4;
-+
-+      /*
-+       * This is a list of all contexts that might exist in the TLB.
-+       * Since we don't yet use PCID, there is only one context.
-+       *
-+       * For each context, ctx_id indicates which mm the TLB's user
-+       * entries came from.  As an invariant, the TLB will never
-+       * contain entries that are out-of-date as when that mm reached
-+       * the tlb_gen in the list.
-+       *
-+       * To be clear, this means that it's legal for the TLB code to
-+       * flush the TLB without updating tlb_gen.  This can happen
-+       * (for now, at least) due to paravirt remote flushes.
-+       */
-+      struct tlb_context ctxs[1];
- };
- DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
- 
-@@ -256,9 +276,26 @@ static inline void __flush_tlb_one(unsigned long addr)
-  * and page-granular flushes are available only on i486 and up.
-  */
- struct flush_tlb_info {
--      struct mm_struct *mm;
--      unsigned long start;
--      unsigned long end;
-+      /*
-+       * We support several kinds of flushes.
-+       *
-+       * - Fully flush a single mm.  .mm will be set, .end will be
-+       *   TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to
-+       *   which the IPI sender is trying to catch us up.
-+       *
-+       * - Partially flush a single mm.  .mm will be set, .start and
-+       *   .end will indicate the range, and .new_tlb_gen will be set
-+       *   such that the changes between generation .new_tlb_gen-1 and
-+       *   .new_tlb_gen are entirely contained in the indicated range.
-+       *
-+       * - Fully flush all mms whose tlb_gens have been updated.  .mm
-+       *   will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen
-+       *   will be zero.
-+       */
-+      struct mm_struct        *mm;
-+      unsigned long           start;
-+      unsigned long           end;
-+      u64                     new_tlb_gen;
- };
- 
- #define local_flush_tlb() __flush_tlb()
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 14f4f8f66aa8..4e5a5ddb9e4d 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -105,6 +105,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-       }
- 
-       this_cpu_write(cpu_tlbstate.loaded_mm, next);
-+      this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
-+      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, atomic64_read(&next->context.tlb_gen));
- 
-       WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
-       cpumask_set_cpu(cpu, mm_cpumask(next));
-@@ -155,25 +157,102 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-       switch_ldt(real_prev, next);
- }
- 
-+/*
-+ * flush_tlb_func_common()'s memory ordering requirement is that any
-+ * TLB fills that happen after we flush the TLB are ordered after we
-+ * read active_mm's tlb_gen.  We don't need any explicit barriers
-+ * because all x86 flush operations are serializing and the
-+ * atomic64_read operation won't be reordered by the compiler.
-+ */
- static void flush_tlb_func_common(const struct flush_tlb_info *f,
-                                 bool local, enum tlb_flush_reason reason)
- {
-+      /*
-+       * We have three different tlb_gen values in here.  They are:
-+       *
-+       * - mm_tlb_gen:     the latest generation.
-+       * - local_tlb_gen:  the generation that this CPU has already caught
-+       *                   up to.
-+       * - f->new_tlb_gen: the generation that the requester of the flush
-+       *                   wants us to catch up to.
-+       */
-+      struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
-+      u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
-+      u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen);
-+
-       /* This code cannot presently handle being reentered. */
-       VM_WARN_ON(!irqs_disabled());
- 
-+      VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
-+                 loaded_mm->context.ctx_id);
-+
-       if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
-+              /*
-+               * leave_mm() is adequate to handle any type of flush, and
-+               * we would prefer not to receive further IPIs.  leave_mm()
-+               * clears this CPU's bit in mm_cpumask().
-+               */
-               leave_mm(smp_processor_id());
-               return;
-       }
- 
--      if (f->end == TLB_FLUSH_ALL) {
--              local_flush_tlb();
--              if (local)
--                      count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
--              trace_tlb_flush(reason, TLB_FLUSH_ALL);
--      } else {
-+      if (unlikely(local_tlb_gen == mm_tlb_gen)) {
-+              /*
-+               * There's nothing to do: we're already up to date.  This can
-+               * happen if two concurrent flushes happen -- the first flush to
-+               * be handled can catch us all the way up, leaving no work for
-+               * the second flush.
-+               */
-+              return;
-+      }
-+
-+      WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
-+      WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
-+
-+      /*
-+       * If we get to this point, we know that our TLB is out of date.
-+       * This does not strictly imply that we need to flush (it's
-+       * possible that f->new_tlb_gen <= local_tlb_gen), but we're
-+       * going to need to flush in the very near future, so we might
-+       * as well get it over with.
-+       *
-+       * The only question is whether to do a full or partial flush.
-+       *
-+       * We do a partial flush if requested and two extra conditions
-+       * are met:
-+       *
-+       * 1. f->new_tlb_gen == local_tlb_gen + 1.  We have an invariant that
-+       *    we've always done all needed flushes to catch up to
-+       *    local_tlb_gen.  If, for example, local_tlb_gen == 2 and
-+       *    f->new_tlb_gen == 3, then we know that the flush needed to bring
-+       *    us up to date for tlb_gen 3 is the partial flush we're
-+       *    processing.
-+       *
-+       *    As an example of why this check is needed, suppose that there
-+       *    are two concurrent flushes.  The first is a full flush that
-+       *    changes context.tlb_gen from 1 to 2.  The second is a partial
-+       *    flush that changes context.tlb_gen from 2 to 3.  If they get
-+       *    processed on this CPU in reverse order, we'll see
-+       *     local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
-+       *    If we were to use __flush_tlb_single() and set local_tlb_gen to
-+       *    3, we'd be break the invariant: we'd update local_tlb_gen above
-+       *    1 without the full flush that's needed for tlb_gen 2.
-+       *
-+       * 2. f->new_tlb_gen == mm_tlb_gen.  This is purely an optimiation.
-+       *    Partial TLB flushes are not all that much cheaper than full TLB
-+       *    flushes, so it seems unlikely that it would be a performance win
-+       *    to do a partial flush if that won't bring our TLB fully up to
-+       *    date.  By doing a full flush instead, we can increase
-+       *    local_tlb_gen all the way to mm_tlb_gen and we can probably
-+       *    avoid another flush in the very near future.
-+       */
-+      if (f->end != TLB_FLUSH_ALL &&
-+          f->new_tlb_gen == local_tlb_gen + 1 &&
-+          f->new_tlb_gen == mm_tlb_gen) {
-+              /* Partial flush */
-               unsigned long addr;
-               unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
-+
-               addr = f->start;
-               while (addr < f->end) {
-                       __flush_tlb_single(addr);
-@@ -182,7 +261,16 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
-               if (local)
-                       count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
-               trace_tlb_flush(reason, nr_pages);
-+      } else {
-+              /* Full flush. */
-+              local_flush_tlb();
-+              if (local)
-+                      count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
-+              trace_tlb_flush(reason, TLB_FLUSH_ALL);
-       }
-+
-+      /* Both paths above update our state to mm_tlb_gen. */
-+      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, mm_tlb_gen);
- }
- 
- static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
-@@ -253,7 +341,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
-       cpu = get_cpu();
- 
-       /* This is also a barrier that synchronizes with switch_mm(). */
--      inc_mm_tlb_gen(mm);
-+      info.new_tlb_gen = inc_mm_tlb_gen(mm);
- 
-       /* Should we flush just the requested range? */
-       if ((end != TLB_FLUSH_ALL) &&
--- 
-2.14.2
-
diff --git a/patches/kernel/0043-x86-mm-Rework-lazy-TLB-mode-and-TLB-freshness-tracki.patch b/patches/kernel/0043-x86-mm-Rework-lazy-TLB-mode-and-TLB-freshness-tracki.patch

deleted file mode 100644 (file)

index 70f93ef..0000000
--- a/patches/kernel/0043-x86-mm-Rework-lazy-TLB-mode-and-TLB-freshness-tracki.patch
+++ /dev/null
@@ -1,453 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 29 Jun 2017 08:53:17 -0700
-Subject: [PATCH] x86/mm: Rework lazy TLB mode and TLB freshness tracking
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-x86's lazy TLB mode used to be fairly weak -- it would switch to
-init_mm the first time it tried to flush a lazy TLB.  This meant an
-unnecessary CR3 write and, if the flush was remote, an unnecessary
-IPI.
-
-Rewrite it entirely.  When we enter lazy mode, we simply remove the
-CPU from mm_cpumask.  This means that we need a way to figure out
-whether we've missed a flush when we switch back out of lazy mode.
-I use the tlb_gen machinery to track whether a context is up to
-date.
-
-Note to reviewers: this patch, my itself, looks a bit odd.  I'm
-using an array of length 1 containing (ctx_id, tlb_gen) rather than
-just storing tlb_gen, and making it at array isn't necessary yet.
-I'm doing this because the next few patches add PCID support, and,
-with PCID, we need ctx_id, and the array will end up with a length
-greater than 1.  Making it an array now means that there will be
-less churn and therefore less stress on your eyeballs.
-
-NB: This is dubious but, AFAICT, still correct on Xen and UV.
-xen_exit_mmap() uses mm_cpumask() for nefarious purposes and this
-patch changes the way that mm_cpumask() works.  This should be okay,
-since Xen *also* iterates all online CPUs to find all the CPUs it
-needs to twiddle.
-
-The UV tlbflush code is rather dated and should be changed.
-
-Here are some benchmark results, done on a Skylake laptop at 2.3 GHz
-(turbo off, intel_pstate requesting max performance) under KVM with
-the guest using idle=poll (to avoid artifacts when bouncing between
-CPUs).  I haven't done any real statistics here -- I just ran them
-in a loop and picked the fastest results that didn't look like
-outliers.  Unpatched means commit a4eb8b993554, so all the
-bookkeeping overhead is gone.
-
-MADV_DONTNEED; touch the page; switch CPUs using sched_setaffinity.  In
-an unpatched kernel, MADV_DONTNEED will send an IPI to the previous CPU.
-This is intended to be a nearly worst-case test.
-
-  patched:         13.4µs
-  unpatched:       21.6µs
-
-Vitaly's pthread_mmap microbenchmark with 8 threads (on four cores),
-nrounds = 100, 256M data
-
-  patched:         1.1 seconds or so
-  unpatched:       1.9 seconds or so
-
-The sleepup on Vitaly's test appearss to be because it spends a lot
-of time blocked on mmap_sem, and this patch avoids sending IPIs to
-blocked CPUs.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andrew Banman <abanman@sgi.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dimitri Sivanich <sivanich@sgi.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Mike Travis <travis@sgi.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/ddf2c92962339f4ba39d8fc41b853936ec0b44f1.1498751203.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 94b1b03b519b81c494900cb112aa00ed205cc2d9)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit b381b7ae452f2bc6384507a897247be7c93a71cc)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/mmu_context.h |   6 +-
- arch/x86/include/asm/tlbflush.h    |   4 -
- arch/x86/mm/init.c                 |   1 -
- arch/x86/mm/tlb.c                  | 197 ++++++++++++++++++++++---------------
- arch/x86/xen/mmu_pv.c              |   5 +-
- 5 files changed, 124 insertions(+), 89 deletions(-)
-
-diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index 6c05679c715b..d6b055b328f2 100644
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -128,8 +128,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
- 
- static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
- {
--      if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
--              this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
-+      int cpu = smp_processor_id();
-+
-+      if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
-+              cpumask_clear_cpu(cpu, mm_cpumask(mm));
- }
- 
- static inline int init_new_context(struct task_struct *tsk,
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 3a167c214560..6397275008db 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -95,7 +95,6 @@ struct tlb_state {
-        * mode even if we've already switched back to swapper_pg_dir.
-        */
-       struct mm_struct *loaded_mm;
--      int state;
- 
-       /*
-        * Access to this CR4 shadow and to H/W CR4 is protected by
-@@ -318,9 +317,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
- void native_flush_tlb_others(const struct cpumask *cpumask,
-                            const struct flush_tlb_info *info);
- 
--#define TLBSTATE_OK   1
--#define TLBSTATE_LAZY 2
--
- static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
-                                       struct mm_struct *mm)
- {
-diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
-index df2624b091a7..c86dc071bb10 100644
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -849,7 +849,6 @@ void __init zone_sizes_init(void)
- 
- DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
-       .loaded_mm = &init_mm,
--      .state = 0,
-       .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
- };
- EXPORT_SYMBOL_GPL(cpu_tlbstate);
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 4e5a5ddb9e4d..0982c997d36f 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -45,8 +45,8 @@ void leave_mm(int cpu)
-       if (loaded_mm == &init_mm)
-               return;
- 
--      if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
--              BUG();
-+      /* Warn if we're not lazy. */
-+      WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
- 
-       switch_mm(NULL, &init_mm, NULL);
- }
-@@ -65,94 +65,117 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
- void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-                       struct task_struct *tsk)
- {
--      unsigned cpu = smp_processor_id();
-       struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
-+      unsigned cpu = smp_processor_id();
-+      u64 next_tlb_gen;
- 
-       /*
--       * NB: The scheduler will call us with prev == next when
--       * switching from lazy TLB mode to normal mode if active_mm
--       * isn't changing.  When this happens, there is no guarantee
--       * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next.
-+       * NB: The scheduler will call us with prev == next when switching
-+       * from lazy TLB mode to normal mode if active_mm isn't changing.
-+       * When this happens, we don't assume that CR3 (and hence
-+       * cpu_tlbstate.loaded_mm) matches next.
-        *
-        * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
-        */
- 
--      this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-+      /* We don't want flush_tlb_func_* to run concurrently with us. */
-+      if (IS_ENABLED(CONFIG_PROVE_LOCKING))
-+              WARN_ON_ONCE(!irqs_disabled());
-+
-+      /*
-+       * Verify that CR3 is what we think it is.  This will catch
-+       * hypothetical buggy code that directly switches to swapper_pg_dir
-+       * without going through leave_mm() / switch_mm_irqs_off().
-+       */
-+      VM_BUG_ON(read_cr3_pa() != __pa(real_prev->pgd));
- 
-       if (real_prev == next) {
--              /*
--               * There's nothing to do: we always keep the per-mm control
--               * regs in sync with cpu_tlbstate.loaded_mm.  Just
--               * sanity-check mm_cpumask.
--               */
--              if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next))))
--                      cpumask_set_cpu(cpu, mm_cpumask(next));
--              return;
--      }
-+              VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
-+                        next->context.ctx_id);
-+
-+              if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
-+                      /*
-+                       * There's nothing to do: we weren't lazy, and we
-+                       * aren't changing our mm.  We don't need to flush
-+                       * anything, nor do we need to update CR3, CR4, or
-+                       * LDTR.
-+                       */
-+                      return;
-+              }
-+
-+              /* Resume remote flushes and then read tlb_gen. */
-+              cpumask_set_cpu(cpu, mm_cpumask(next));
-+              next_tlb_gen = atomic64_read(&next->context.tlb_gen);
-+
-+              if (this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen) < next_tlb_gen) {
-+                      /*
-+                       * Ideally, we'd have a flush_tlb() variant that
-+                       * takes the known CR3 value as input.  This would
-+                       * be faster on Xen PV and on hypothetical CPUs
-+                       * on which INVPCID is fast.
-+                       */
-+                      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen,
-+                                     next_tlb_gen);
-+                      write_cr3(__pa(next->pgd));
-+
-+                      /*
-+                       * This gets called via leave_mm() in the idle path
-+                       * where RCU functions differently.  Tracing normally
-+                       * uses RCU, so we have to call the tracepoint
-+                       * specially here.
-+                       */
-+                      trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
-+                                              TLB_FLUSH_ALL);
-+              }
- 
--      if (IS_ENABLED(CONFIG_VMAP_STACK)) {
-               /*
--               * If our current stack is in vmalloc space and isn't
--               * mapped in the new pgd, we'll double-fault.  Forcibly
--               * map it.
-+               * We just exited lazy mode, which means that CR4 and/or LDTR
-+               * may be stale.  (Changes to the required CR4 and LDTR states
-+               * are not reflected in tlb_gen.)
-                */
--              unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
--
--              pgd_t *pgd = next->pgd + stack_pgd_index;
--
--              if (unlikely(pgd_none(*pgd)))
--                      set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
--      }
-+      } else {
-+              VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) ==
-+                        next->context.ctx_id);
-+
-+              if (IS_ENABLED(CONFIG_VMAP_STACK)) {
-+                      /*
-+                       * If our current stack is in vmalloc space and isn't
-+                       * mapped in the new pgd, we'll double-fault.  Forcibly
-+                       * map it.
-+                       */
-+                      unsigned int index = pgd_index(current_stack_pointer());
-+                      pgd_t *pgd = next->pgd + index;
-+
-+                      if (unlikely(pgd_none(*pgd)))
-+                              set_pgd(pgd, init_mm.pgd[index]);
-+              }
- 
--      this_cpu_write(cpu_tlbstate.loaded_mm, next);
--      this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
--      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, atomic64_read(&next->context.tlb_gen));
-+              /* Stop remote flushes for the previous mm */
-+              if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
-+                      cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
- 
--      WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
--      cpumask_set_cpu(cpu, mm_cpumask(next));
-+              VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
- 
--      /*
--       * Re-load page tables.
--       *
--       * This logic has an ordering constraint:
--       *
--       *  CPU 0: Write to a PTE for 'next'
--       *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
--       *  CPU 1: set bit 1 in next's mm_cpumask
--       *  CPU 1: load from the PTE that CPU 0 writes (implicit)
--       *
--       * We need to prevent an outcome in which CPU 1 observes
--       * the new PTE value and CPU 0 observes bit 1 clear in
--       * mm_cpumask.  (If that occurs, then the IPI will never
--       * be sent, and CPU 0's TLB will contain a stale entry.)
--       *
--       * The bad outcome can occur if either CPU's load is
--       * reordered before that CPU's store, so both CPUs must
--       * execute full barriers to prevent this from happening.
--       *
--       * Thus, switch_mm needs a full barrier between the
--       * store to mm_cpumask and any operation that could load
--       * from next->pgd.  TLB fills are special and can happen
--       * due to instruction fetches or for no reason at all,
--       * and neither LOCK nor MFENCE orders them.
--       * Fortunately, load_cr3() is serializing and gives the
--       * ordering guarantee we need.
--       */
--      load_cr3(next->pgd);
-+              /*
-+               * Start remote flushes and then read tlb_gen.
-+               */
-+              cpumask_set_cpu(cpu, mm_cpumask(next));
-+              next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- 
--      /*
--       * This gets called via leave_mm() in the idle path where RCU
--       * functions differently.  Tracing normally uses RCU, so we have to
--       * call the tracepoint specially here.
--       */
--      trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-+              this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
-+              this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, next_tlb_gen);
-+              this_cpu_write(cpu_tlbstate.loaded_mm, next);
-+              write_cr3(__pa(next->pgd));
- 
--      /* Stop flush ipis for the previous mm */
--      WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
--                   real_prev != &init_mm);
--      cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
-+              /*
-+               * This gets called via leave_mm() in the idle path where RCU
-+               * functions differently.  Tracing normally uses RCU, so we
-+               * have to call the tracepoint specially here.
-+               */
-+              trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
-+                                      TLB_FLUSH_ALL);
-+      }
- 
--      /* Load per-mm CR4 and LDTR state */
-       load_mm_cr4(next);
-       switch_ldt(real_prev, next);
- }
-@@ -186,13 +209,13 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
-       VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
-                  loaded_mm->context.ctx_id);
- 
--      if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
-+      if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
-               /*
--               * leave_mm() is adequate to handle any type of flush, and
--               * we would prefer not to receive further IPIs.  leave_mm()
--               * clears this CPU's bit in mm_cpumask().
-+               * We're in lazy mode -- don't flush.  We can get here on
-+               * remote flushes due to races and on local flushes if a
-+               * kernel thread coincidentally flushes the mm it's lazily
-+               * still using.
-                */
--              leave_mm(smp_processor_id());
-               return;
-       }
- 
-@@ -203,6 +226,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
-                * be handled can catch us all the way up, leaving no work for
-                * the second flush.
-                */
-+              trace_tlb_flush(reason, 0);
-               return;
-       }
- 
-@@ -304,6 +328,21 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
-                               (info->end - info->start) >> PAGE_SHIFT);
- 
-       if (is_uv_system()) {
-+              /*
-+               * This whole special case is confused.  UV has a "Broadcast
-+               * Assist Unit", which seems to be a fancy way to send IPIs.
-+               * Back when x86 used an explicit TLB flush IPI, UV was
-+               * optimized to use its own mechanism.  These days, x86 uses
-+               * smp_call_function_many(), but UV still uses a manual IPI,
-+               * and that IPI's action is out of date -- it does a manual
-+               * flush instead of calling flush_tlb_func_remote().  This
-+               * means that the percpu tlb_gen variables won't be updated
-+               * and we'll do pointless flushes on future context switches.
-+               *
-+               * Rather than hooking native_flush_tlb_others() here, I think
-+               * that UV should be updated so that smp_call_function_many(),
-+               * etc, are optimal on UV.
-+               */
-               unsigned int cpu;
- 
-               cpu = smp_processor_id();
-@@ -363,6 +402,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
- 
-       if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
-               flush_tlb_others(mm_cpumask(mm), &info);
-+
-       put_cpu();
- }
- 
-@@ -371,8 +411,6 @@ static void do_flush_tlb_all(void *info)
- {
-       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
-       __flush_tlb_all();
--      if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
--              leave_mm(smp_processor_id());
- }
- 
- void flush_tlb_all(void)
-@@ -425,6 +463,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
- 
-       if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
-               flush_tlb_others(&batch->cpumask, &info);
-+
-       cpumask_clear(&batch->cpumask);
- 
-       put_cpu();
-diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
-index 5f61b7e2e6b2..ba76f3ce997f 100644
---- a/arch/x86/xen/mmu_pv.c
-+++ b/arch/x86/xen/mmu_pv.c
-@@ -1005,14 +1005,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
-       /* Get the "official" set of cpus referring to our pagetable. */
-       if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
-               for_each_online_cpu(cpu) {
--                      if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
--                          && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
-+                      if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
-                               continue;
-                       smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
-               }
-               return;
-       }
--      cpumask_copy(mask, mm_cpumask(mm));
- 
-       /*
-        * It's possible that a vcpu may have a stale reference to our
-@@ -1021,6 +1019,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
-        * look at its actual current cr3 value, and force it to flush
-        * if needed.
-        */
-+      cpumask_clear(mask);
-       for_each_online_cpu(cpu) {
-               if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
-                       cpumask_set_cpu(cpu, mask);
--- 
-2.14.2
-
diff --git a/patches/kernel/0043-x86-mm-Track-the-TLB-s-tlb_gen-and-update-the-flushi.patch b/patches/kernel/0043-x86-mm-Track-the-TLB-s-tlb_gen-and-update-the-flushi.patch

new file mode 100644 (file)

index 0000000..2630f26
--- /dev/null
+++ b/patches/kernel/0043-x86-mm-Track-the-TLB-s-tlb_gen-and-update-the-flushi.patch
@@ -0,0 +1,279 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:16 -0700
+Subject: [PATCH] x86/mm: Track the TLB's tlb_gen and update the flushing
+ algorithm
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+There are two kernel features that would benefit from tracking
+how up-to-date each CPU's TLB is in the case where IPIs aren't keeping
+it up to date in real time:
+
+ - Lazy mm switching currently works by switching to init_mm when
+   it would otherwise flush.  This is wasteful: there isn't fundamentally
+   any need to update CR3 at all when going lazy or when returning from
+   lazy mode, nor is there any need to receive flush IPIs at all.  Instead,
+   we should just stop trying to keep the TLB coherent when we go lazy and,
+   when unlazying, check whether we missed any flushes.
+
+ - PCID will let us keep recent user contexts alive in the TLB.  If we
+   start doing this, we need a way to decide whether those contexts are
+   up to date.
+
+On some paravirt systems, remote TLBs can be flushed without IPIs.
+This won't update the target CPUs' tlb_gens, which may cause
+unnecessary local flushes later on.  We can address this if it becomes
+a problem by carefully updating the target CPU's tlb_gen directly.
+
+By itself, this patch is a very minor optimization that avoids
+unnecessary flushes when multiple TLB flushes targetting the same CPU
+race.  The complexity in this patch would not be worth it on its own,
+but it will enable improved lazy TLB tracking and PCID.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/1210fb244bc9cbe7677f7f0b72db4d359675f24b.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit b0579ade7cd82391360e959cc844e50a160e8a96)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d34881c25f3c70228ed792fd62881185a25c4422)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/tlbflush.h |  43 +++++++++++++++--
+ arch/x86/mm/tlb.c               | 102 +++++++++++++++++++++++++++++++++++++---
+ 2 files changed, 135 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index f1f2e73b7b77..3a167c214560 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -82,6 +82,11 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+ #endif
+ 
++struct tlb_context {
++      u64 ctx_id;
++      u64 tlb_gen;
++};
++
+ struct tlb_state {
+       /*
+        * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
+@@ -97,6 +102,21 @@ struct tlb_state {
+        * disabling interrupts when modifying either one.
+        */
+       unsigned long cr4;
++
++      /*
++       * This is a list of all contexts that might exist in the TLB.
++       * Since we don't yet use PCID, there is only one context.
++       *
++       * For each context, ctx_id indicates which mm the TLB's user
++       * entries came from.  As an invariant, the TLB will never
++       * contain entries that are out-of-date as when that mm reached
++       * the tlb_gen in the list.
++       *
++       * To be clear, this means that it's legal for the TLB code to
++       * flush the TLB without updating tlb_gen.  This can happen
++       * (for now, at least) due to paravirt remote flushes.
++       */
++      struct tlb_context ctxs[1];
+ };
+ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+ 
+@@ -256,9 +276,26 @@ static inline void __flush_tlb_one(unsigned long addr)
+  * and page-granular flushes are available only on i486 and up.
+  */
+ struct flush_tlb_info {
+-      struct mm_struct *mm;
+-      unsigned long start;
+-      unsigned long end;
++      /*
++       * We support several kinds of flushes.
++       *
++       * - Fully flush a single mm.  .mm will be set, .end will be
++       *   TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to
++       *   which the IPI sender is trying to catch us up.
++       *
++       * - Partially flush a single mm.  .mm will be set, .start and
++       *   .end will indicate the range, and .new_tlb_gen will be set
++       *   such that the changes between generation .new_tlb_gen-1 and
++       *   .new_tlb_gen are entirely contained in the indicated range.
++       *
++       * - Fully flush all mms whose tlb_gens have been updated.  .mm
++       *   will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen
++       *   will be zero.
++       */
++      struct mm_struct        *mm;
++      unsigned long           start;
++      unsigned long           end;
++      u64                     new_tlb_gen;
+ };
+ 
+ #define local_flush_tlb() __flush_tlb()
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 14f4f8f66aa8..4e5a5ddb9e4d 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -105,6 +105,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+       }
+ 
+       this_cpu_write(cpu_tlbstate.loaded_mm, next);
++      this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
++      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, atomic64_read(&next->context.tlb_gen));
+ 
+       WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
+       cpumask_set_cpu(cpu, mm_cpumask(next));
+@@ -155,25 +157,102 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+       switch_ldt(real_prev, next);
+ }
+ 
++/*
++ * flush_tlb_func_common()'s memory ordering requirement is that any
++ * TLB fills that happen after we flush the TLB are ordered after we
++ * read active_mm's tlb_gen.  We don't need any explicit barriers
++ * because all x86 flush operations are serializing and the
++ * atomic64_read operation won't be reordered by the compiler.
++ */
+ static void flush_tlb_func_common(const struct flush_tlb_info *f,
+                                 bool local, enum tlb_flush_reason reason)
+ {
++      /*
++       * We have three different tlb_gen values in here.  They are:
++       *
++       * - mm_tlb_gen:     the latest generation.
++       * - local_tlb_gen:  the generation that this CPU has already caught
++       *                   up to.
++       * - f->new_tlb_gen: the generation that the requester of the flush
++       *                   wants us to catch up to.
++       */
++      struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
++      u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
++      u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen);
++
+       /* This code cannot presently handle being reentered. */
+       VM_WARN_ON(!irqs_disabled());
+ 
++      VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
++                 loaded_mm->context.ctx_id);
++
+       if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
++              /*
++               * leave_mm() is adequate to handle any type of flush, and
++               * we would prefer not to receive further IPIs.  leave_mm()
++               * clears this CPU's bit in mm_cpumask().
++               */
+               leave_mm(smp_processor_id());
+               return;
+       }
+ 
+-      if (f->end == TLB_FLUSH_ALL) {
+-              local_flush_tlb();
+-              if (local)
+-                      count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-              trace_tlb_flush(reason, TLB_FLUSH_ALL);
+-      } else {
++      if (unlikely(local_tlb_gen == mm_tlb_gen)) {
++              /*
++               * There's nothing to do: we're already up to date.  This can
++               * happen if two concurrent flushes happen -- the first flush to
++               * be handled can catch us all the way up, leaving no work for
++               * the second flush.
++               */
++              return;
++      }
++
++      WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
++      WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
++
++      /*
++       * If we get to this point, we know that our TLB is out of date.
++       * This does not strictly imply that we need to flush (it's
++       * possible that f->new_tlb_gen <= local_tlb_gen), but we're
++       * going to need to flush in the very near future, so we might
++       * as well get it over with.
++       *
++       * The only question is whether to do a full or partial flush.
++       *
++       * We do a partial flush if requested and two extra conditions
++       * are met:
++       *
++       * 1. f->new_tlb_gen == local_tlb_gen + 1.  We have an invariant that
++       *    we've always done all needed flushes to catch up to
++       *    local_tlb_gen.  If, for example, local_tlb_gen == 2 and
++       *    f->new_tlb_gen == 3, then we know that the flush needed to bring
++       *    us up to date for tlb_gen 3 is the partial flush we're
++       *    processing.
++       *
++       *    As an example of why this check is needed, suppose that there
++       *    are two concurrent flushes.  The first is a full flush that
++       *    changes context.tlb_gen from 1 to 2.  The second is a partial
++       *    flush that changes context.tlb_gen from 2 to 3.  If they get
++       *    processed on this CPU in reverse order, we'll see
++       *     local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
++       *    If we were to use __flush_tlb_single() and set local_tlb_gen to
++       *    3, we'd be break the invariant: we'd update local_tlb_gen above
++       *    1 without the full flush that's needed for tlb_gen 2.
++       *
++       * 2. f->new_tlb_gen == mm_tlb_gen.  This is purely an optimiation.
++       *    Partial TLB flushes are not all that much cheaper than full TLB
++       *    flushes, so it seems unlikely that it would be a performance win
++       *    to do a partial flush if that won't bring our TLB fully up to
++       *    date.  By doing a full flush instead, we can increase
++       *    local_tlb_gen all the way to mm_tlb_gen and we can probably
++       *    avoid another flush in the very near future.
++       */
++      if (f->end != TLB_FLUSH_ALL &&
++          f->new_tlb_gen == local_tlb_gen + 1 &&
++          f->new_tlb_gen == mm_tlb_gen) {
++              /* Partial flush */
+               unsigned long addr;
+               unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
++
+               addr = f->start;
+               while (addr < f->end) {
+                       __flush_tlb_single(addr);
+@@ -182,7 +261,16 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
+               if (local)
+                       count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
+               trace_tlb_flush(reason, nr_pages);
++      } else {
++              /* Full flush. */
++              local_flush_tlb();
++              if (local)
++                      count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
++              trace_tlb_flush(reason, TLB_FLUSH_ALL);
+       }
++
++      /* Both paths above update our state to mm_tlb_gen. */
++      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, mm_tlb_gen);
+ }
+ 
+ static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
+@@ -253,7 +341,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+       cpu = get_cpu();
+ 
+       /* This is also a barrier that synchronizes with switch_mm(). */
+-      inc_mm_tlb_gen(mm);
++      info.new_tlb_gen = inc_mm_tlb_gen(mm);
+ 
+       /* Should we flush just the requested range? */
+       if ((end != TLB_FLUSH_ALL) &&
+-- 
+2.14.2
+
diff --git a/patches/kernel/0044-x86-mm-Implement-PCID-based-optimization-try-to-pres.patch b/patches/kernel/0044-x86-mm-Implement-PCID-based-optimization-try-to-pres.patch

deleted file mode 100644 (file)

index 0b9df59..0000000
--- a/patches/kernel/0044-x86-mm-Implement-PCID-based-optimization-try-to-pres.patch
+++ /dev/null
@@ -1,340 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 24 Jul 2017 21:41:38 -0700
-Subject: [PATCH] x86/mm: Implement PCID based optimization: try to preserve
- old TLB entries using PCID
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-PCID is a "process context ID" -- it's what other architectures call
-an address space ID.  Every non-global TLB entry is tagged with a
-PCID, only TLB entries that match the currently selected PCID are
-used, and we can switch PGDs without flushing the TLB.  x86's
-PCID is 12 bits.
-
-This is an unorthodox approach to using PCID.  x86's PCID is far too
-short to uniquely identify a process, and we can't even really
-uniquely identify a running process because there are monster
-systems with over 4096 CPUs.  To make matters worse, past attempts
-to use all 12 PCID bits have resulted in slowdowns instead of
-speedups.
-
-This patch uses PCID differently.  We use a PCID to identify a
-recently-used mm on a per-cpu basis.  An mm has no fixed PCID
-binding at all; instead, we give it a fresh PCID each time it's
-loaded except in cases where we want to preserve the TLB, in which
-case we reuse a recent value.
-
-Here are some benchmark results, done on a Skylake laptop at 2.3 GHz
-(turbo off, intel_pstate requesting max performance) under KVM with
-the guest using idle=poll (to avoid artifacts when bouncing between
-CPUs).  I haven't done any real statistics here -- I just ran them
-in a loop and picked the fastest results that didn't look like
-outliers.  Unpatched means commit a4eb8b993554, so all the
-bookkeeping overhead is gone.
-
-ping-pong between two mms on the same CPU using eventfd:
-
-  patched:         1.22µs
-  patched, nopcid: 1.33µs
-  unpatched:       1.34µs
-
-Same ping-pong, but now touch 512 pages (all zero-page to minimize
-cache misses) each iteration.  dTLB misses are measured by
-dtlb_load_misses.miss_causes_a_walk:
-
-  patched:         1.8µs  11M  dTLB misses
-  patched, nopcid: 6.2µs, 207M dTLB misses
-  unpatched:       6.1µs, 190M dTLB misses
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Arjan van de Ven <arjan@linux.intel.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mel Gorman <mgorman@suse.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/9ee75f17a81770feed616358e6860d98a2a5b1e7.1500957502.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit 10af6235e0d327d42e1bad974385197817923dc1)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d833a976288cdcf7fb1dabb48ebf614ebf6a311c)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/mmu_context.h     |  3 ++
- arch/x86/include/asm/processor-flags.h |  2 +
- arch/x86/include/asm/tlbflush.h        | 18 +++++++-
- arch/x86/mm/init.c                     |  1 +
- arch/x86/mm/tlb.c                      | 84 +++++++++++++++++++++++++---------
- 5 files changed, 85 insertions(+), 23 deletions(-)
-
-diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index d6b055b328f2..7ae318c340d9 100644
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -298,6 +298,9 @@ static inline unsigned long __get_current_cr3_fast(void)
- {
-       unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
- 
-+      if (static_cpu_has(X86_FEATURE_PCID))
-+              cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-+
-       /* For now, be very restrictive about when this can be called. */
-       VM_WARN_ON(in_nmi() || preemptible());
- 
-diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
-index 79aa2f98398d..791b60199aa4 100644
---- a/arch/x86/include/asm/processor-flags.h
-+++ b/arch/x86/include/asm/processor-flags.h
-@@ -35,6 +35,7 @@
- /* Mask off the address space ID bits. */
- #define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull
- #define CR3_PCID_MASK 0xFFFull
-+#define CR3_NOFLUSH (1UL << 63)
- #else
- /*
-  * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
-@@ -42,6 +43,7 @@
-  */
- #define CR3_ADDR_MASK 0xFFFFFFFFull
- #define CR3_PCID_MASK 0ull
-+#define CR3_NOFLUSH 0
- #endif
- 
- #endif /* _ASM_X86_PROCESSOR_FLAGS_H */
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 6397275008db..d23e61dc0640 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -82,6 +82,12 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
- #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
- #endif
- 
-+/*
-+ * 6 because 6 should be plenty and struct tlb_state will fit in
-+ * two cache lines.
-+ */
-+#define TLB_NR_DYN_ASIDS 6
-+
- struct tlb_context {
-       u64 ctx_id;
-       u64 tlb_gen;
-@@ -95,6 +101,8 @@ struct tlb_state {
-        * mode even if we've already switched back to swapper_pg_dir.
-        */
-       struct mm_struct *loaded_mm;
-+      u16 loaded_mm_asid;
-+      u16 next_asid;
- 
-       /*
-        * Access to this CR4 shadow and to H/W CR4 is protected by
-@@ -104,7 +112,8 @@ struct tlb_state {
- 
-       /*
-        * This is a list of all contexts that might exist in the TLB.
--       * Since we don't yet use PCID, there is only one context.
-+       * There is one per ASID that we use, and the ASID (what the
-+       * CPU calls PCID) is the index into ctxts.
-        *
-        * For each context, ctx_id indicates which mm the TLB's user
-        * entries came from.  As an invariant, the TLB will never
-@@ -114,8 +123,13 @@ struct tlb_state {
-        * To be clear, this means that it's legal for the TLB code to
-        * flush the TLB without updating tlb_gen.  This can happen
-        * (for now, at least) due to paravirt remote flushes.
-+       *
-+       * NB: context 0 is a bit special, since it's also used by
-+       * various bits of init code.  This is fine -- code that
-+       * isn't aware of PCID will end up harmlessly flushing
-+       * context 0.
-        */
--      struct tlb_context ctxs[1];
-+      struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
- };
- DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
- 
-diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
-index c86dc071bb10..af5c1ed21d43 100644
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -849,6 +849,7 @@ void __init zone_sizes_init(void)
- 
- DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
-       .loaded_mm = &init_mm,
-+      .next_asid = 1,
-       .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
- };
- EXPORT_SYMBOL_GPL(cpu_tlbstate);
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 0982c997d36f..57943b4d8f2e 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -30,6 +30,40 @@
- 
- atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
- 
-+static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
-+                          u16 *new_asid, bool *need_flush)
-+{
-+      u16 asid;
-+
-+      if (!static_cpu_has(X86_FEATURE_PCID)) {
-+              *new_asid = 0;
-+              *need_flush = true;
-+              return;
-+      }
-+
-+      for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
-+              if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
-+                  next->context.ctx_id)
-+                      continue;
-+
-+              *new_asid = asid;
-+              *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
-+                             next_tlb_gen);
-+              return;
-+      }
-+
-+      /*
-+       * We don't currently own an ASID slot on this CPU.
-+       * Allocate a slot.
-+       */
-+      *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
-+      if (*new_asid >= TLB_NR_DYN_ASIDS) {
-+              *new_asid = 0;
-+              this_cpu_write(cpu_tlbstate.next_asid, 1);
-+      }
-+      *need_flush = true;
-+}
-+
- void leave_mm(int cpu)
- {
-       struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
-@@ -66,6 +100,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-                       struct task_struct *tsk)
- {
-       struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
-+      u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-       unsigned cpu = smp_processor_id();
-       u64 next_tlb_gen;
- 
-@@ -85,12 +120,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-       /*
-        * Verify that CR3 is what we think it is.  This will catch
-        * hypothetical buggy code that directly switches to swapper_pg_dir
--       * without going through leave_mm() / switch_mm_irqs_off().
-+       * without going through leave_mm() / switch_mm_irqs_off() or that
-+       * does something like write_cr3(read_cr3_pa()).
-        */
--      VM_BUG_ON(read_cr3_pa() != __pa(real_prev->pgd));
-+      VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
- 
-       if (real_prev == next) {
--              VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
-+              VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
-                         next->context.ctx_id);
- 
-               if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
-@@ -107,16 +143,17 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-               cpumask_set_cpu(cpu, mm_cpumask(next));
-               next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- 
--              if (this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen) < next_tlb_gen) {
-+              if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
-+                  next_tlb_gen) {
-                       /*
-                        * Ideally, we'd have a flush_tlb() variant that
-                        * takes the known CR3 value as input.  This would
-                        * be faster on Xen PV and on hypothetical CPUs
-                        * on which INVPCID is fast.
-                        */
--                      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen,
-+                      this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
-                                      next_tlb_gen);
--                      write_cr3(__pa(next->pgd));
-+                      write_cr3(__pa(next->pgd) | prev_asid);
- 
-                       /*
-                        * This gets called via leave_mm() in the idle path
-@@ -134,8 +171,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-                * are not reflected in tlb_gen.)
-                */
-       } else {
--              VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) ==
--                        next->context.ctx_id);
-+              u16 new_asid;
-+              bool need_flush;
- 
-               if (IS_ENABLED(CONFIG_VMAP_STACK)) {
-                       /*
-@@ -162,18 +199,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-               cpumask_set_cpu(cpu, mm_cpumask(next));
-               next_tlb_gen = atomic64_read(&next->context.tlb_gen);
- 
--              this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
--              this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, next_tlb_gen);
--              this_cpu_write(cpu_tlbstate.loaded_mm, next);
--              write_cr3(__pa(next->pgd));
-+              choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
- 
--              /*
--               * This gets called via leave_mm() in the idle path where RCU
--               * functions differently.  Tracing normally uses RCU, so we
--               * have to call the tracepoint specially here.
--               */
--              trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
-+              if (need_flush) {
-+                      this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
-+                      this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-+                      write_cr3(__pa(next->pgd) | new_asid);
-+                      trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
-                                       TLB_FLUSH_ALL);
-+              } else {
-+                      /* The new ASID is already up to date. */
-+                      write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
-+                      trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
-+              }
-+
-+              this_cpu_write(cpu_tlbstate.loaded_mm, next);
-+              this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
-       }
- 
-       load_mm_cr4(next);
-@@ -200,13 +241,14 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
-        *                   wants us to catch up to.
-        */
-       struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
-+      u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-       u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
--      u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen);
-+      u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
- 
-       /* This code cannot presently handle being reentered. */
-       VM_WARN_ON(!irqs_disabled());
- 
--      VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
-+      VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
-                  loaded_mm->context.ctx_id);
- 
-       if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
-@@ -294,7 +336,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
-       }
- 
-       /* Both paths above update our state to mm_tlb_gen. */
--      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, mm_tlb_gen);
-+      this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
- }
- 
- static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
--- 
-2.14.2
-
diff --git a/patches/kernel/0044-x86-mm-Rework-lazy-TLB-mode-and-TLB-freshness-tracki.patch b/patches/kernel/0044-x86-mm-Rework-lazy-TLB-mode-and-TLB-freshness-tracki.patch

new file mode 100644 (file)

index 0000000..70f93ef
--- /dev/null
+++ b/patches/kernel/0044-x86-mm-Rework-lazy-TLB-mode-and-TLB-freshness-tracki.patch
@@ -0,0 +1,453 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:17 -0700
+Subject: [PATCH] x86/mm: Rework lazy TLB mode and TLB freshness tracking
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+x86's lazy TLB mode used to be fairly weak -- it would switch to
+init_mm the first time it tried to flush a lazy TLB.  This meant an
+unnecessary CR3 write and, if the flush was remote, an unnecessary
+IPI.
+
+Rewrite it entirely.  When we enter lazy mode, we simply remove the
+CPU from mm_cpumask.  This means that we need a way to figure out
+whether we've missed a flush when we switch back out of lazy mode.
+I use the tlb_gen machinery to track whether a context is up to
+date.
+
+Note to reviewers: this patch, my itself, looks a bit odd.  I'm
+using an array of length 1 containing (ctx_id, tlb_gen) rather than
+just storing tlb_gen, and making it at array isn't necessary yet.
+I'm doing this because the next few patches add PCID support, and,
+with PCID, we need ctx_id, and the array will end up with a length
+greater than 1.  Making it an array now means that there will be
+less churn and therefore less stress on your eyeballs.
+
+NB: This is dubious but, AFAICT, still correct on Xen and UV.
+xen_exit_mmap() uses mm_cpumask() for nefarious purposes and this
+patch changes the way that mm_cpumask() works.  This should be okay,
+since Xen *also* iterates all online CPUs to find all the CPUs it
+needs to twiddle.
+
+The UV tlbflush code is rather dated and should be changed.
+
+Here are some benchmark results, done on a Skylake laptop at 2.3 GHz
+(turbo off, intel_pstate requesting max performance) under KVM with
+the guest using idle=poll (to avoid artifacts when bouncing between
+CPUs).  I haven't done any real statistics here -- I just ran them
+in a loop and picked the fastest results that didn't look like
+outliers.  Unpatched means commit a4eb8b993554, so all the
+bookkeeping overhead is gone.
+
+MADV_DONTNEED; touch the page; switch CPUs using sched_setaffinity.  In
+an unpatched kernel, MADV_DONTNEED will send an IPI to the previous CPU.
+This is intended to be a nearly worst-case test.
+
+  patched:         13.4µs
+  unpatched:       21.6µs
+
+Vitaly's pthread_mmap microbenchmark with 8 threads (on four cores),
+nrounds = 100, 256M data
+
+  patched:         1.1 seconds or so
+  unpatched:       1.9 seconds or so
+
+The sleepup on Vitaly's test appearss to be because it spends a lot
+of time blocked on mmap_sem, and this patch avoids sending IPIs to
+blocked CPUs.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Banman <abanman@sgi.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dimitri Sivanich <sivanich@sgi.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Mike Travis <travis@sgi.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/ddf2c92962339f4ba39d8fc41b853936ec0b44f1.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 94b1b03b519b81c494900cb112aa00ed205cc2d9)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit b381b7ae452f2bc6384507a897247be7c93a71cc)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/mmu_context.h |   6 +-
+ arch/x86/include/asm/tlbflush.h    |   4 -
+ arch/x86/mm/init.c                 |   1 -
+ arch/x86/mm/tlb.c                  | 197 ++++++++++++++++++++++---------------
+ arch/x86/xen/mmu_pv.c              |   5 +-
+ 5 files changed, 124 insertions(+), 89 deletions(-)
+
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index 6c05679c715b..d6b055b328f2 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -128,8 +128,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
+ 
+ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+ {
+-      if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+-              this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
++      int cpu = smp_processor_id();
++
++      if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
++              cpumask_clear_cpu(cpu, mm_cpumask(mm));
+ }
+ 
+ static inline int init_new_context(struct task_struct *tsk,
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 3a167c214560..6397275008db 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -95,7 +95,6 @@ struct tlb_state {
+        * mode even if we've already switched back to swapper_pg_dir.
+        */
+       struct mm_struct *loaded_mm;
+-      int state;
+ 
+       /*
+        * Access to this CR4 shadow and to H/W CR4 is protected by
+@@ -318,9 +317,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
+ void native_flush_tlb_others(const struct cpumask *cpumask,
+                            const struct flush_tlb_info *info);
+ 
+-#define TLBSTATE_OK   1
+-#define TLBSTATE_LAZY 2
+-
+ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
+                                       struct mm_struct *mm)
+ {
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index df2624b091a7..c86dc071bb10 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -849,7 +849,6 @@ void __init zone_sizes_init(void)
+ 
+ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+       .loaded_mm = &init_mm,
+-      .state = 0,
+       .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
+ };
+ EXPORT_SYMBOL_GPL(cpu_tlbstate);
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 4e5a5ddb9e4d..0982c997d36f 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -45,8 +45,8 @@ void leave_mm(int cpu)
+       if (loaded_mm == &init_mm)
+               return;
+ 
+-      if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+-              BUG();
++      /* Warn if we're not lazy. */
++      WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
+ 
+       switch_mm(NULL, &init_mm, NULL);
+ }
+@@ -65,94 +65,117 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+                       struct task_struct *tsk)
+ {
+-      unsigned cpu = smp_processor_id();
+       struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
++      unsigned cpu = smp_processor_id();
++      u64 next_tlb_gen;
+ 
+       /*
+-       * NB: The scheduler will call us with prev == next when
+-       * switching from lazy TLB mode to normal mode if active_mm
+-       * isn't changing.  When this happens, there is no guarantee
+-       * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next.
++       * NB: The scheduler will call us with prev == next when switching
++       * from lazy TLB mode to normal mode if active_mm isn't changing.
++       * When this happens, we don't assume that CR3 (and hence
++       * cpu_tlbstate.loaded_mm) matches next.
+        *
+        * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
+        */
+ 
+-      this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
++      /* We don't want flush_tlb_func_* to run concurrently with us. */
++      if (IS_ENABLED(CONFIG_PROVE_LOCKING))
++              WARN_ON_ONCE(!irqs_disabled());
++
++      /*
++       * Verify that CR3 is what we think it is.  This will catch
++       * hypothetical buggy code that directly switches to swapper_pg_dir
++       * without going through leave_mm() / switch_mm_irqs_off().
++       */
++      VM_BUG_ON(read_cr3_pa() != __pa(real_prev->pgd));
+ 
+       if (real_prev == next) {
+-              /*
+-               * There's nothing to do: we always keep the per-mm control
+-               * regs in sync with cpu_tlbstate.loaded_mm.  Just
+-               * sanity-check mm_cpumask.
+-               */
+-              if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next))))
+-                      cpumask_set_cpu(cpu, mm_cpumask(next));
+-              return;
+-      }
++              VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
++                        next->context.ctx_id);
++
++              if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
++                      /*
++                       * There's nothing to do: we weren't lazy, and we
++                       * aren't changing our mm.  We don't need to flush
++                       * anything, nor do we need to update CR3, CR4, or
++                       * LDTR.
++                       */
++                      return;
++              }
++
++              /* Resume remote flushes and then read tlb_gen. */
++              cpumask_set_cpu(cpu, mm_cpumask(next));
++              next_tlb_gen = atomic64_read(&next->context.tlb_gen);
++
++              if (this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen) < next_tlb_gen) {
++                      /*
++                       * Ideally, we'd have a flush_tlb() variant that
++                       * takes the known CR3 value as input.  This would
++                       * be faster on Xen PV and on hypothetical CPUs
++                       * on which INVPCID is fast.
++                       */
++                      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen,
++                                     next_tlb_gen);
++                      write_cr3(__pa(next->pgd));
++
++                      /*
++                       * This gets called via leave_mm() in the idle path
++                       * where RCU functions differently.  Tracing normally
++                       * uses RCU, so we have to call the tracepoint
++                       * specially here.
++                       */
++                      trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
++                                              TLB_FLUSH_ALL);
++              }
+ 
+-      if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+               /*
+-               * If our current stack is in vmalloc space and isn't
+-               * mapped in the new pgd, we'll double-fault.  Forcibly
+-               * map it.
++               * We just exited lazy mode, which means that CR4 and/or LDTR
++               * may be stale.  (Changes to the required CR4 and LDTR states
++               * are not reflected in tlb_gen.)
+                */
+-              unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
+-
+-              pgd_t *pgd = next->pgd + stack_pgd_index;
+-
+-              if (unlikely(pgd_none(*pgd)))
+-                      set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
+-      }
++      } else {
++              VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) ==
++                        next->context.ctx_id);
++
++              if (IS_ENABLED(CONFIG_VMAP_STACK)) {
++                      /*
++                       * If our current stack is in vmalloc space and isn't
++                       * mapped in the new pgd, we'll double-fault.  Forcibly
++                       * map it.
++                       */
++                      unsigned int index = pgd_index(current_stack_pointer());
++                      pgd_t *pgd = next->pgd + index;
++
++                      if (unlikely(pgd_none(*pgd)))
++                              set_pgd(pgd, init_mm.pgd[index]);
++              }
+ 
+-      this_cpu_write(cpu_tlbstate.loaded_mm, next);
+-      this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
+-      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, atomic64_read(&next->context.tlb_gen));
++              /* Stop remote flushes for the previous mm */
++              if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
++                      cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
+ 
+-      WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
+-      cpumask_set_cpu(cpu, mm_cpumask(next));
++              VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
+ 
+-      /*
+-       * Re-load page tables.
+-       *
+-       * This logic has an ordering constraint:
+-       *
+-       *  CPU 0: Write to a PTE for 'next'
+-       *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
+-       *  CPU 1: set bit 1 in next's mm_cpumask
+-       *  CPU 1: load from the PTE that CPU 0 writes (implicit)
+-       *
+-       * We need to prevent an outcome in which CPU 1 observes
+-       * the new PTE value and CPU 0 observes bit 1 clear in
+-       * mm_cpumask.  (If that occurs, then the IPI will never
+-       * be sent, and CPU 0's TLB will contain a stale entry.)
+-       *
+-       * The bad outcome can occur if either CPU's load is
+-       * reordered before that CPU's store, so both CPUs must
+-       * execute full barriers to prevent this from happening.
+-       *
+-       * Thus, switch_mm needs a full barrier between the
+-       * store to mm_cpumask and any operation that could load
+-       * from next->pgd.  TLB fills are special and can happen
+-       * due to instruction fetches or for no reason at all,
+-       * and neither LOCK nor MFENCE orders them.
+-       * Fortunately, load_cr3() is serializing and gives the
+-       * ordering guarantee we need.
+-       */
+-      load_cr3(next->pgd);
++              /*
++               * Start remote flushes and then read tlb_gen.
++               */
++              cpumask_set_cpu(cpu, mm_cpumask(next));
++              next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+ 
+-      /*
+-       * This gets called via leave_mm() in the idle path where RCU
+-       * functions differently.  Tracing normally uses RCU, so we have to
+-       * call the tracepoint specially here.
+-       */
+-      trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
++              this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
++              this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, next_tlb_gen);
++              this_cpu_write(cpu_tlbstate.loaded_mm, next);
++              write_cr3(__pa(next->pgd));
+ 
+-      /* Stop flush ipis for the previous mm */
+-      WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
+-                   real_prev != &init_mm);
+-      cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
++              /*
++               * This gets called via leave_mm() in the idle path where RCU
++               * functions differently.  Tracing normally uses RCU, so we
++               * have to call the tracepoint specially here.
++               */
++              trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
++                                      TLB_FLUSH_ALL);
++      }
+ 
+-      /* Load per-mm CR4 and LDTR state */
+       load_mm_cr4(next);
+       switch_ldt(real_prev, next);
+ }
+@@ -186,13 +209,13 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
+       VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
+                  loaded_mm->context.ctx_id);
+ 
+-      if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
++      if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
+               /*
+-               * leave_mm() is adequate to handle any type of flush, and
+-               * we would prefer not to receive further IPIs.  leave_mm()
+-               * clears this CPU's bit in mm_cpumask().
++               * We're in lazy mode -- don't flush.  We can get here on
++               * remote flushes due to races and on local flushes if a
++               * kernel thread coincidentally flushes the mm it's lazily
++               * still using.
+                */
+-              leave_mm(smp_processor_id());
+               return;
+       }
+ 
+@@ -203,6 +226,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
+                * be handled can catch us all the way up, leaving no work for
+                * the second flush.
+                */
++              trace_tlb_flush(reason, 0);
+               return;
+       }
+ 
+@@ -304,6 +328,21 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
+                               (info->end - info->start) >> PAGE_SHIFT);
+ 
+       if (is_uv_system()) {
++              /*
++               * This whole special case is confused.  UV has a "Broadcast
++               * Assist Unit", which seems to be a fancy way to send IPIs.
++               * Back when x86 used an explicit TLB flush IPI, UV was
++               * optimized to use its own mechanism.  These days, x86 uses
++               * smp_call_function_many(), but UV still uses a manual IPI,
++               * and that IPI's action is out of date -- it does a manual
++               * flush instead of calling flush_tlb_func_remote().  This
++               * means that the percpu tlb_gen variables won't be updated
++               * and we'll do pointless flushes on future context switches.
++               *
++               * Rather than hooking native_flush_tlb_others() here, I think
++               * that UV should be updated so that smp_call_function_many(),
++               * etc, are optimal on UV.
++               */
+               unsigned int cpu;
+ 
+               cpu = smp_processor_id();
+@@ -363,6 +402,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ 
+       if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
+               flush_tlb_others(mm_cpumask(mm), &info);
++
+       put_cpu();
+ }
+ 
+@@ -371,8 +411,6 @@ static void do_flush_tlb_all(void *info)
+ {
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+       __flush_tlb_all();
+-      if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
+-              leave_mm(smp_processor_id());
+ }
+ 
+ void flush_tlb_all(void)
+@@ -425,6 +463,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+ 
+       if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
+               flush_tlb_others(&batch->cpumask, &info);
++
+       cpumask_clear(&batch->cpumask);
+ 
+       put_cpu();
+diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
+index 5f61b7e2e6b2..ba76f3ce997f 100644
+--- a/arch/x86/xen/mmu_pv.c
++++ b/arch/x86/xen/mmu_pv.c
+@@ -1005,14 +1005,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
+       /* Get the "official" set of cpus referring to our pagetable. */
+       if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
+               for_each_online_cpu(cpu) {
+-                      if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
+-                          && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
++                      if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
+                               continue;
+                       smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
+               }
+               return;
+       }
+-      cpumask_copy(mask, mm_cpumask(mm));
+ 
+       /*
+        * It's possible that a vcpu may have a stale reference to our
+@@ -1021,6 +1019,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
+        * look at its actual current cr3 value, and force it to flush
+        * if needed.
+        */
++      cpumask_clear(mask);
+       for_each_online_cpu(cpu) {
+               if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
+                       cpumask_set_cpu(cpu, mask);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0045-x86-mm-Factor-out-CR3-building-code.patch b/patches/kernel/0045-x86-mm-Factor-out-CR3-building-code.patch

deleted file mode 100644 (file)

index 01f7292..0000000
--- a/patches/kernel/0045-x86-mm-Factor-out-CR3-building-code.patch
+++ /dev/null
@@ -1,176 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 17 Sep 2017 09:03:48 -0700
-Subject: [PATCH] x86/mm: Factor out CR3-building code
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Current, the code that assembles a value to load into CR3 is
-open-coded everywhere.  Factor it out into helpers build_cr3() and
-build_cr3_noflush().
-
-This makes one semantic change: __get_current_cr3_fast() was wrong
-on SME systems.  No one noticed because the only caller is in the
-VMX code, and there are no CPUs with both SME and VMX.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: Tom Lendacky <Thomas.Lendacky@amd.com>
-Link: http://lkml.kernel.org/r/ce350cf11e93e2842d14d0b95b0199c7d881f527.1505663533.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit 47061a24e2ee5bd8a40d473d47a5bd823fa0081f)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 72be211bac7be521f128d419d63cae38ba60ace8)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/mmu_context.h | 15 ++++++---
- arch/x86/mm/tlb.c                  | 68 +++++++++++++++++++++++++++++++++++---
- 2 files changed, 75 insertions(+), 8 deletions(-)
-
-diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index 7ae318c340d9..a999ba6b721f 100644
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -286,6 +286,15 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-       return __pkru_allows_pkey(vma_pkey(vma), write);
- }
- 
-+static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
-+{
-+      return __sme_pa(mm->pgd) | asid;
-+}
-+
-+static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
-+{
-+      return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
-+}
- 
- /*
-  * This can be used from process context to figure out what the value of
-@@ -296,10 +305,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-  */
- static inline unsigned long __get_current_cr3_fast(void)
- {
--      unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
--
--      if (static_cpu_has(X86_FEATURE_PCID))
--              cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-+      unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
-+              this_cpu_read(cpu_tlbstate.loaded_mm_asid));
- 
-       /* For now, be very restrictive about when this can be called. */
-       VM_WARN_ON(in_nmi() || preemptible());
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 57943b4d8f2e..440400316c8a 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -123,7 +123,23 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-        * without going through leave_mm() / switch_mm_irqs_off() or that
-        * does something like write_cr3(read_cr3_pa()).
-        */
--      VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
-+#ifdef CONFIG_DEBUG_VM
-+      if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
-+              /*
-+               * If we were to BUG here, we'd be very likely to kill
-+               * the system so hard that we don't see the call trace.
-+               * Try to recover instead by ignoring the error and doing
-+               * a global flush to minimize the chance of corruption.
-+               *
-+               * (This is far from being a fully correct recovery.
-+               *  Architecturally, the CPU could prefetch something
-+               *  back into an incorrect ASID slot and leave it there
-+               *  to cause trouble down the road.  It's better than
-+               *  nothing, though.)
-+               */
-+              __flush_tlb_all();
-+      }
-+#endif
- 
-       if (real_prev == next) {
-               VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
-@@ -153,7 +169,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-                        */
-                       this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
-                                      next_tlb_gen);
--                      write_cr3(__pa(next->pgd) | prev_asid);
-+                      write_cr3(build_cr3(next, prev_asid));
- 
-                       /*
-                        * This gets called via leave_mm() in the idle path
-@@ -204,12 +220,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-               if (need_flush) {
-                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
-                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
--                      write_cr3(__pa(next->pgd) | new_asid);
-+                      write_cr3(build_cr3(next, new_asid));
-                       trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
-                                       TLB_FLUSH_ALL);
-               } else {
-                       /* The new ASID is already up to date. */
--                      write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
-+                      write_cr3(build_cr3_noflush(next, new_asid));
-                       trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
-               }
- 
-@@ -221,6 +237,50 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-       switch_ldt(real_prev, next);
- }
- 
-+/*
-+ * Call this when reinitializing a CPU.  It fixes the following potential
-+ * problems:
-+ *
-+ * - The ASID changed from what cpu_tlbstate thinks it is (most likely
-+ *   because the CPU was taken down and came back up with CR3's PCID
-+ *   bits clear.  CPU hotplug can do this.
-+ *
-+ * - The TLB contains junk in slots corresponding to inactive ASIDs.
-+ *
-+ * - The CPU went so far out to lunch that it may have missed a TLB
-+ *   flush.
-+ */
-+void initialize_tlbstate_and_flush(void)
-+{
-+      int i;
-+      struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
-+      u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
-+      unsigned long cr3 = __read_cr3();
-+
-+      /* Assert that CR3 already references the right mm. */
-+      WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
-+
-+      /*
-+       * Assert that CR4.PCIDE is set if needed.  (CR4.PCIDE initialization
-+       * doesn't work like other CR4 bits because it can only be set from
-+       * long mode.)
-+       */
-+      WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
-+              !(cr4_read_shadow() & X86_CR4_PCIDE));
-+
-+      /* Force ASID 0 and force a TLB flush. */
-+      write_cr3(build_cr3(mm, 0));
-+
-+      /* Reinitialize tlbstate. */
-+      this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
-+      this_cpu_write(cpu_tlbstate.next_asid, 1);
-+      this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
-+      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
-+
-+      for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
-+              this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
-+}
-+
- /*
-  * flush_tlb_func_common()'s memory ordering requirement is that any
-  * TLB fills that happen after we flush the TLB are ordered after we
--- 
-2.14.2
-
diff --git a/patches/kernel/0045-x86-mm-Implement-PCID-based-optimization-try-to-pres.patch b/patches/kernel/0045-x86-mm-Implement-PCID-based-optimization-try-to-pres.patch

new file mode 100644 (file)

index 0000000..0b9df59
--- /dev/null
+++ b/patches/kernel/0045-x86-mm-Implement-PCID-based-optimization-try-to-pres.patch
@@ -0,0 +1,340 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 24 Jul 2017 21:41:38 -0700
+Subject: [PATCH] x86/mm: Implement PCID based optimization: try to preserve
+ old TLB entries using PCID
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+PCID is a "process context ID" -- it's what other architectures call
+an address space ID.  Every non-global TLB entry is tagged with a
+PCID, only TLB entries that match the currently selected PCID are
+used, and we can switch PGDs without flushing the TLB.  x86's
+PCID is 12 bits.
+
+This is an unorthodox approach to using PCID.  x86's PCID is far too
+short to uniquely identify a process, and we can't even really
+uniquely identify a running process because there are monster
+systems with over 4096 CPUs.  To make matters worse, past attempts
+to use all 12 PCID bits have resulted in slowdowns instead of
+speedups.
+
+This patch uses PCID differently.  We use a PCID to identify a
+recently-used mm on a per-cpu basis.  An mm has no fixed PCID
+binding at all; instead, we give it a fresh PCID each time it's
+loaded except in cases where we want to preserve the TLB, in which
+case we reuse a recent value.
+
+Here are some benchmark results, done on a Skylake laptop at 2.3 GHz
+(turbo off, intel_pstate requesting max performance) under KVM with
+the guest using idle=poll (to avoid artifacts when bouncing between
+CPUs).  I haven't done any real statistics here -- I just ran them
+in a loop and picked the fastest results that didn't look like
+outliers.  Unpatched means commit a4eb8b993554, so all the
+bookkeeping overhead is gone.
+
+ping-pong between two mms on the same CPU using eventfd:
+
+  patched:         1.22µs
+  patched, nopcid: 1.33µs
+  unpatched:       1.34µs
+
+Same ping-pong, but now touch 512 pages (all zero-page to minimize
+cache misses) each iteration.  dTLB misses are measured by
+dtlb_load_misses.miss_causes_a_walk:
+
+  patched:         1.8µs  11M  dTLB misses
+  patched, nopcid: 6.2µs, 207M dTLB misses
+  unpatched:       6.1µs, 190M dTLB misses
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/9ee75f17a81770feed616358e6860d98a2a5b1e7.1500957502.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit 10af6235e0d327d42e1bad974385197817923dc1)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d833a976288cdcf7fb1dabb48ebf614ebf6a311c)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/mmu_context.h     |  3 ++
+ arch/x86/include/asm/processor-flags.h |  2 +
+ arch/x86/include/asm/tlbflush.h        | 18 +++++++-
+ arch/x86/mm/init.c                     |  1 +
+ arch/x86/mm/tlb.c                      | 84 +++++++++++++++++++++++++---------
+ 5 files changed, 85 insertions(+), 23 deletions(-)
+
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index d6b055b328f2..7ae318c340d9 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -298,6 +298,9 @@ static inline unsigned long __get_current_cr3_fast(void)
+ {
+       unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
+ 
++      if (static_cpu_has(X86_FEATURE_PCID))
++              cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
++
+       /* For now, be very restrictive about when this can be called. */
+       VM_WARN_ON(in_nmi() || preemptible());
+ 
+diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
+index 79aa2f98398d..791b60199aa4 100644
+--- a/arch/x86/include/asm/processor-flags.h
++++ b/arch/x86/include/asm/processor-flags.h
+@@ -35,6 +35,7 @@
+ /* Mask off the address space ID bits. */
+ #define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull
+ #define CR3_PCID_MASK 0xFFFull
++#define CR3_NOFLUSH (1UL << 63)
+ #else
+ /*
+  * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
+@@ -42,6 +43,7 @@
+  */
+ #define CR3_ADDR_MASK 0xFFFFFFFFull
+ #define CR3_PCID_MASK 0ull
++#define CR3_NOFLUSH 0
+ #endif
+ 
+ #endif /* _ASM_X86_PROCESSOR_FLAGS_H */
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 6397275008db..d23e61dc0640 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -82,6 +82,12 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+ #endif
+ 
++/*
++ * 6 because 6 should be plenty and struct tlb_state will fit in
++ * two cache lines.
++ */
++#define TLB_NR_DYN_ASIDS 6
++
+ struct tlb_context {
+       u64 ctx_id;
+       u64 tlb_gen;
+@@ -95,6 +101,8 @@ struct tlb_state {
+        * mode even if we've already switched back to swapper_pg_dir.
+        */
+       struct mm_struct *loaded_mm;
++      u16 loaded_mm_asid;
++      u16 next_asid;
+ 
+       /*
+        * Access to this CR4 shadow and to H/W CR4 is protected by
+@@ -104,7 +112,8 @@ struct tlb_state {
+ 
+       /*
+        * This is a list of all contexts that might exist in the TLB.
+-       * Since we don't yet use PCID, there is only one context.
++       * There is one per ASID that we use, and the ASID (what the
++       * CPU calls PCID) is the index into ctxts.
+        *
+        * For each context, ctx_id indicates which mm the TLB's user
+        * entries came from.  As an invariant, the TLB will never
+@@ -114,8 +123,13 @@ struct tlb_state {
+        * To be clear, this means that it's legal for the TLB code to
+        * flush the TLB without updating tlb_gen.  This can happen
+        * (for now, at least) due to paravirt remote flushes.
++       *
++       * NB: context 0 is a bit special, since it's also used by
++       * various bits of init code.  This is fine -- code that
++       * isn't aware of PCID will end up harmlessly flushing
++       * context 0.
+        */
+-      struct tlb_context ctxs[1];
++      struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
+ };
+ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+ 
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index c86dc071bb10..af5c1ed21d43 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -849,6 +849,7 @@ void __init zone_sizes_init(void)
+ 
+ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+       .loaded_mm = &init_mm,
++      .next_asid = 1,
+       .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
+ };
+ EXPORT_SYMBOL_GPL(cpu_tlbstate);
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 0982c997d36f..57943b4d8f2e 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -30,6 +30,40 @@
+ 
+ atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
+ 
++static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
++                          u16 *new_asid, bool *need_flush)
++{
++      u16 asid;
++
++      if (!static_cpu_has(X86_FEATURE_PCID)) {
++              *new_asid = 0;
++              *need_flush = true;
++              return;
++      }
++
++      for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
++              if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
++                  next->context.ctx_id)
++                      continue;
++
++              *new_asid = asid;
++              *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
++                             next_tlb_gen);
++              return;
++      }
++
++      /*
++       * We don't currently own an ASID slot on this CPU.
++       * Allocate a slot.
++       */
++      *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
++      if (*new_asid >= TLB_NR_DYN_ASIDS) {
++              *new_asid = 0;
++              this_cpu_write(cpu_tlbstate.next_asid, 1);
++      }
++      *need_flush = true;
++}
++
+ void leave_mm(int cpu)
+ {
+       struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+@@ -66,6 +100,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+                       struct task_struct *tsk)
+ {
+       struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
++      u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+       unsigned cpu = smp_processor_id();
+       u64 next_tlb_gen;
+ 
+@@ -85,12 +120,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+       /*
+        * Verify that CR3 is what we think it is.  This will catch
+        * hypothetical buggy code that directly switches to swapper_pg_dir
+-       * without going through leave_mm() / switch_mm_irqs_off().
++       * without going through leave_mm() / switch_mm_irqs_off() or that
++       * does something like write_cr3(read_cr3_pa()).
+        */
+-      VM_BUG_ON(read_cr3_pa() != __pa(real_prev->pgd));
++      VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
+ 
+       if (real_prev == next) {
+-              VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
++              VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
+                         next->context.ctx_id);
+ 
+               if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
+@@ -107,16 +143,17 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+               cpumask_set_cpu(cpu, mm_cpumask(next));
+               next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+ 
+-              if (this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen) < next_tlb_gen) {
++              if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
++                  next_tlb_gen) {
+                       /*
+                        * Ideally, we'd have a flush_tlb() variant that
+                        * takes the known CR3 value as input.  This would
+                        * be faster on Xen PV and on hypothetical CPUs
+                        * on which INVPCID is fast.
+                        */
+-                      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen,
++                      this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
+                                      next_tlb_gen);
+-                      write_cr3(__pa(next->pgd));
++                      write_cr3(__pa(next->pgd) | prev_asid);
+ 
+                       /*
+                        * This gets called via leave_mm() in the idle path
+@@ -134,8 +171,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+                * are not reflected in tlb_gen.)
+                */
+       } else {
+-              VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) ==
+-                        next->context.ctx_id);
++              u16 new_asid;
++              bool need_flush;
+ 
+               if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+                       /*
+@@ -162,18 +199,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+               cpumask_set_cpu(cpu, mm_cpumask(next));
+               next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+ 
+-              this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
+-              this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, next_tlb_gen);
+-              this_cpu_write(cpu_tlbstate.loaded_mm, next);
+-              write_cr3(__pa(next->pgd));
++              choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ 
+-              /*
+-               * This gets called via leave_mm() in the idle path where RCU
+-               * functions differently.  Tracing normally uses RCU, so we
+-               * have to call the tracepoint specially here.
+-               */
+-              trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
++              if (need_flush) {
++                      this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
++                      this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
++                      write_cr3(__pa(next->pgd) | new_asid);
++                      trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
+                                       TLB_FLUSH_ALL);
++              } else {
++                      /* The new ASID is already up to date. */
++                      write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
++                      trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
++              }
++
++              this_cpu_write(cpu_tlbstate.loaded_mm, next);
++              this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
+       }
+ 
+       load_mm_cr4(next);
+@@ -200,13 +241,14 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
+        *                   wants us to catch up to.
+        */
+       struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
++      u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+       u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
+-      u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen);
++      u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
+ 
+       /* This code cannot presently handle being reentered. */
+       VM_WARN_ON(!irqs_disabled());
+ 
+-      VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
++      VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
+                  loaded_mm->context.ctx_id);
+ 
+       if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
+@@ -294,7 +336,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
+       }
+ 
+       /* Both paths above update our state to mm_tlb_gen. */
+-      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, mm_tlb_gen);
++      this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
+ }
+ 
+ static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0046-x86-mm-64-Stop-using-CR3.PCID-0-in-ASID-aware-code.patch b/patches/kernel/0046-x86-mm-64-Stop-using-CR3.PCID-0-in-ASID-aware-code.patch

deleted file mode 100644 (file)

index 6ce824b..0000000
--- a/patches/kernel/0046-x86-mm-64-Stop-using-CR3.PCID-0-in-ASID-aware-code.patch
+++ /dev/null
@@ -1,85 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 17 Sep 2017 09:03:49 -0700
-Subject: [PATCH] x86/mm/64: Stop using CR3.PCID == 0 in ASID-aware code
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Putting the logical ASID into CR3's PCID bits directly means that we
-have two cases to consider separately: ASID == 0 and ASID != 0.
-This means that bugs that only hit in one of these cases trigger
-nondeterministically.
-
-There were some bugs like this in the past, and I think there's
-still one in current kernels.  In particular, we have a number of
-ASID-unware code paths that save CR3, write some special value, and
-then restore CR3.  This includes suspend/resume, hibernate, kexec,
-EFI, and maybe other things I've missed.  This is currently
-dangerous: if ASID != 0, then this code sequence will leave garbage
-in the TLB tagged for ASID 0.  We could potentially see corruption
-when switching back to ASID 0.  In principle, an
-initialize_tlbstate_and_flush() call after these sequences would
-solve the problem, but EFI, at least, does not call this.  (And it
-probably shouldn't -- initialize_tlbstate_and_flush() is rather
-expensive.)
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/cdc14bbe5d3c3ef2a562be09a6368ffe9bd947a6.1505663533.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 52a2af400c1075219b3f0ce5c96fc961da44018a)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 15e474753e66e44da1365049f465427053a453ba)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/mmu_context.h | 21 +++++++++++++++++++--
- 1 file changed, 19 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index a999ba6b721f..c120b5db178a 100644
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -286,14 +286,31 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-       return __pkru_allows_pkey(vma_pkey(vma), write);
- }
- 
-+/*
-+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
-+ * bits.  This serves two purposes.  It prevents a nasty situation in
-+ * which PCID-unaware code saves CR3, loads some other value (with PCID
-+ * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
-+ * the saved ASID was nonzero.  It also means that any bugs involving
-+ * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
-+ * deterministically.
-+ */
-+
- static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
- {
--      return __sme_pa(mm->pgd) | asid;
-+      if (static_cpu_has(X86_FEATURE_PCID)) {
-+              VM_WARN_ON_ONCE(asid > 4094);
-+              return __sme_pa(mm->pgd) | (asid + 1);
-+      } else {
-+              VM_WARN_ON_ONCE(asid != 0);
-+              return __sme_pa(mm->pgd);
-+      }
- }
- 
- static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
- {
--      return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
-+      VM_WARN_ON_ONCE(asid > 4094);
-+      return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
- }
- 
- /*
--- 
-2.14.2
-
diff --git a/patches/kernel/0046-x86-mm-Factor-out-CR3-building-code.patch b/patches/kernel/0046-x86-mm-Factor-out-CR3-building-code.patch

new file mode 100644 (file)

index 0000000..01f7292
--- /dev/null
+++ b/patches/kernel/0046-x86-mm-Factor-out-CR3-building-code.patch
@@ -0,0 +1,176 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 17 Sep 2017 09:03:48 -0700
+Subject: [PATCH] x86/mm: Factor out CR3-building code
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Current, the code that assembles a value to load into CR3 is
+open-coded everywhere.  Factor it out into helpers build_cr3() and
+build_cr3_noflush().
+
+This makes one semantic change: __get_current_cr3_fast() was wrong
+on SME systems.  No one noticed because the only caller is in the
+VMX code, and there are no CPUs with both SME and VMX.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tom Lendacky <Thomas.Lendacky@amd.com>
+Link: http://lkml.kernel.org/r/ce350cf11e93e2842d14d0b95b0199c7d881f527.1505663533.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit 47061a24e2ee5bd8a40d473d47a5bd823fa0081f)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 72be211bac7be521f128d419d63cae38ba60ace8)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/mmu_context.h | 15 ++++++---
+ arch/x86/mm/tlb.c                  | 68 +++++++++++++++++++++++++++++++++++---
+ 2 files changed, 75 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index 7ae318c340d9..a999ba6b721f 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -286,6 +286,15 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+       return __pkru_allows_pkey(vma_pkey(vma), write);
+ }
+ 
++static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
++{
++      return __sme_pa(mm->pgd) | asid;
++}
++
++static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
++{
++      return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
++}
+ 
+ /*
+  * This can be used from process context to figure out what the value of
+@@ -296,10 +305,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+  */
+ static inline unsigned long __get_current_cr3_fast(void)
+ {
+-      unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
+-
+-      if (static_cpu_has(X86_FEATURE_PCID))
+-              cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
++      unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
++              this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+ 
+       /* For now, be very restrictive about when this can be called. */
+       VM_WARN_ON(in_nmi() || preemptible());
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 57943b4d8f2e..440400316c8a 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -123,7 +123,23 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+        * without going through leave_mm() / switch_mm_irqs_off() or that
+        * does something like write_cr3(read_cr3_pa()).
+        */
+-      VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
++#ifdef CONFIG_DEBUG_VM
++      if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
++              /*
++               * If we were to BUG here, we'd be very likely to kill
++               * the system so hard that we don't see the call trace.
++               * Try to recover instead by ignoring the error and doing
++               * a global flush to minimize the chance of corruption.
++               *
++               * (This is far from being a fully correct recovery.
++               *  Architecturally, the CPU could prefetch something
++               *  back into an incorrect ASID slot and leave it there
++               *  to cause trouble down the road.  It's better than
++               *  nothing, though.)
++               */
++              __flush_tlb_all();
++      }
++#endif
+ 
+       if (real_prev == next) {
+               VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
+@@ -153,7 +169,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+                        */
+                       this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
+                                      next_tlb_gen);
+-                      write_cr3(__pa(next->pgd) | prev_asid);
++                      write_cr3(build_cr3(next, prev_asid));
+ 
+                       /*
+                        * This gets called via leave_mm() in the idle path
+@@ -204,12 +220,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+               if (need_flush) {
+                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
+                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
+-                      write_cr3(__pa(next->pgd) | new_asid);
++                      write_cr3(build_cr3(next, new_asid));
+                       trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
+                                       TLB_FLUSH_ALL);
+               } else {
+                       /* The new ASID is already up to date. */
+-                      write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
++                      write_cr3(build_cr3_noflush(next, new_asid));
+                       trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
+               }
+ 
+@@ -221,6 +237,50 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+       switch_ldt(real_prev, next);
+ }
+ 
++/*
++ * Call this when reinitializing a CPU.  It fixes the following potential
++ * problems:
++ *
++ * - The ASID changed from what cpu_tlbstate thinks it is (most likely
++ *   because the CPU was taken down and came back up with CR3's PCID
++ *   bits clear.  CPU hotplug can do this.
++ *
++ * - The TLB contains junk in slots corresponding to inactive ASIDs.
++ *
++ * - The CPU went so far out to lunch that it may have missed a TLB
++ *   flush.
++ */
++void initialize_tlbstate_and_flush(void)
++{
++      int i;
++      struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
++      u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
++      unsigned long cr3 = __read_cr3();
++
++      /* Assert that CR3 already references the right mm. */
++      WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
++
++      /*
++       * Assert that CR4.PCIDE is set if needed.  (CR4.PCIDE initialization
++       * doesn't work like other CR4 bits because it can only be set from
++       * long mode.)
++       */
++      WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
++              !(cr4_read_shadow() & X86_CR4_PCIDE));
++
++      /* Force ASID 0 and force a TLB flush. */
++      write_cr3(build_cr3(mm, 0));
++
++      /* Reinitialize tlbstate. */
++      this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
++      this_cpu_write(cpu_tlbstate.next_asid, 1);
++      this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
++      this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
++
++      for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
++              this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
++}
++
+ /*
+  * flush_tlb_func_common()'s memory ordering requirement is that any
+  * TLB fills that happen after we flush the TLB are ordered after we
+-- 
+2.14.2
+
diff --git a/patches/kernel/0047-x86-mm-64-Stop-using-CR3.PCID-0-in-ASID-aware-code.patch b/patches/kernel/0047-x86-mm-64-Stop-using-CR3.PCID-0-in-ASID-aware-code.patch

new file mode 100644 (file)

index 0000000..6ce824b
--- /dev/null
+++ b/patches/kernel/0047-x86-mm-64-Stop-using-CR3.PCID-0-in-ASID-aware-code.patch
@@ -0,0 +1,85 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 17 Sep 2017 09:03:49 -0700
+Subject: [PATCH] x86/mm/64: Stop using CR3.PCID == 0 in ASID-aware code
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Putting the logical ASID into CR3's PCID bits directly means that we
+have two cases to consider separately: ASID == 0 and ASID != 0.
+This means that bugs that only hit in one of these cases trigger
+nondeterministically.
+
+There were some bugs like this in the past, and I think there's
+still one in current kernels.  In particular, we have a number of
+ASID-unware code paths that save CR3, write some special value, and
+then restore CR3.  This includes suspend/resume, hibernate, kexec,
+EFI, and maybe other things I've missed.  This is currently
+dangerous: if ASID != 0, then this code sequence will leave garbage
+in the TLB tagged for ASID 0.  We could potentially see corruption
+when switching back to ASID 0.  In principle, an
+initialize_tlbstate_and_flush() call after these sequences would
+solve the problem, but EFI, at least, does not call this.  (And it
+probably shouldn't -- initialize_tlbstate_and_flush() is rather
+expensive.)
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/cdc14bbe5d3c3ef2a562be09a6368ffe9bd947a6.1505663533.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 52a2af400c1075219b3f0ce5c96fc961da44018a)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 15e474753e66e44da1365049f465427053a453ba)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/mmu_context.h | 21 +++++++++++++++++++--
+ 1 file changed, 19 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index a999ba6b721f..c120b5db178a 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -286,14 +286,31 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+       return __pkru_allows_pkey(vma_pkey(vma), write);
+ }
+ 
++/*
++ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
++ * bits.  This serves two purposes.  It prevents a nasty situation in
++ * which PCID-unaware code saves CR3, loads some other value (with PCID
++ * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
++ * the saved ASID was nonzero.  It also means that any bugs involving
++ * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
++ * deterministically.
++ */
++
+ static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
+ {
+-      return __sme_pa(mm->pgd) | asid;
++      if (static_cpu_has(X86_FEATURE_PCID)) {
++              VM_WARN_ON_ONCE(asid > 4094);
++              return __sme_pa(mm->pgd) | (asid + 1);
++      } else {
++              VM_WARN_ON_ONCE(asid != 0);
++              return __sme_pa(mm->pgd);
++      }
+ }
+ 
+ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+ {
+-      return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
++      VM_WARN_ON_ONCE(asid > 4094);
++      return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
+ }
+ 
+ /*
+-- 
+2.14.2
+
diff --git a/patches/kernel/0047-x86-mm-Flush-more-aggressively-in-lazy-TLB-mode.patch b/patches/kernel/0047-x86-mm-Flush-more-aggressively-in-lazy-TLB-mode.patch

deleted file mode 100644 (file)

index 62c8c07..0000000
--- a/patches/kernel/0047-x86-mm-Flush-more-aggressively-in-lazy-TLB-mode.patch
+++ /dev/null
@@ -1,401 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 9 Oct 2017 09:50:49 -0700
-Subject: [PATCH] x86/mm: Flush more aggressively in lazy TLB mode
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Since commit:
-
-  94b1b03b519b ("x86/mm: Rework lazy TLB mode and TLB freshness tracking")
-
-x86's lazy TLB mode has been all the way lazy: when running a kernel thread
-(including the idle thread), the kernel keeps using the last user mm's
-page tables without attempting to maintain user TLB coherence at all.
-
-From a pure semantic perspective, this is fine -- kernel threads won't
-attempt to access user pages, so having stale TLB entries doesn't matter.
-
-Unfortunately, I forgot about a subtlety.  By skipping TLB flushes,
-we also allow any paging-structure caches that may exist on the CPU
-to become incoherent.  This means that we can have a
-paging-structure cache entry that references a freed page table, and
-the CPU is within its rights to do a speculative page walk starting
-at the freed page table.
-
-I can imagine this causing two different problems:
-
- - A speculative page walk starting from a bogus page table could read
-   IO addresses.  I haven't seen any reports of this causing problems.
-
- - A speculative page walk that involves a bogus page table can install
-   garbage in the TLB.  Such garbage would always be at a user VA, but
-   some AMD CPUs have logic that triggers a machine check when it notices
-   these bogus entries.  I've seen a couple reports of this.
-
-Boris further explains the failure mode:
-
-> It is actually more of an optimization which assumes that paging-structure
-> entries are in WB DRAM:
->
-> "TlbCacheDis: cacheable memory disable. Read-write. 0=Enables
-> performance optimization that assumes PML4, PDP, PDE, and PTE entries
-> are in cacheable WB-DRAM; memory type checks may be bypassed, and
-> addresses outside of WB-DRAM may result in undefined behavior or NB
-> protocol errors. 1=Disables performance optimization and allows PML4,
-> PDP, PDE and PTE entries to be in any memory type. Operating systems
-> that maintain page tables in memory types other than WB- DRAM must set
-> TlbCacheDis to insure proper operation."
->
-> The MCE generated is an NB protocol error to signal that
->
-> "Link: A specific coherent-only packet from a CPU was issued to an
-> IO link. This may be caused by software which addresses page table
-> structures in a memory type other than cacheable WB-DRAM without
-> properly configuring MSRC001_0015[TlbCacheDis]. This may occur, for
-> example, when page table structure addresses are above top of memory. In
-> such cases, the NB will generate an MCE if it sees a mismatch between
-> the memory operation generated by the core and the link type."
->
-> I'm assuming coherent-only packets don't go out on IO links, thus the
-> error.
-
-To fix this, reinstate TLB coherence in lazy mode.  With this patch
-applied, we do it in one of two ways:
-
- - If we have PCID, we simply switch back to init_mm's page tables
-   when we enter a kernel thread -- this seems to be quite cheap
-   except for the cost of serializing the CPU.
-
- - If we don't have PCID, then we set a flag and switch to init_mm
-   the first time we would otherwise need to flush the TLB.
-
-The /sys/kernel/debug/x86/tlb_use_lazy_mode debug switch can be changed
-to override the default mode for benchmarking.
-
-In theory, we could optimize this better by only flushing the TLB in
-lazy CPUs when a page table is freed.  Doing that would require
-auditing the mm code to make sure that all page table freeing goes
-through tlb_remove_page() as well as reworking some data structures
-to implement the improved flush logic.
-
-Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
-Reported-by: Adam Borowski <kilobyte@angband.pl>
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Borislav Petkov <bp@suse.de>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Daniel Borkmann <daniel@iogearbox.net>
-Cc: Eric Biggers <ebiggers@google.com>
-Cc: Johannes Hirte <johannes.hirte@datenkhaos.de>
-Cc: Kees Cook <keescook@chromium.org>
-Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Nadav Amit <nadav.amit@gmail.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Roman Kagan <rkagan@virtuozzo.com>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Fixes: 94b1b03b519b ("x86/mm: Rework lazy TLB mode and TLB freshness tracking")
-Link: http://lkml.kernel.org/r/20171009170231.fkpraqokz6e4zeco@pd.tnic
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit b956575bed91ecfb136a8300742ecbbf451471ab)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit a4bb9409c548ece51ec246fc5113a32b8d130142)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/mmu_context.h |   8 +-
- arch/x86/include/asm/tlbflush.h    |  24 ++++++
- arch/x86/mm/tlb.c                  | 160 +++++++++++++++++++++++++------------
- 3 files changed, 136 insertions(+), 56 deletions(-)
-
-diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index c120b5db178a..3c856a15b98e 100644
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -126,13 +126,7 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
-       DEBUG_LOCKS_WARN_ON(preemptible());
- }
- 
--static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
--{
--      int cpu = smp_processor_id();
--
--      if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
--              cpumask_clear_cpu(cpu, mm_cpumask(mm));
--}
-+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
- 
- static inline int init_new_context(struct task_struct *tsk,
-                                  struct mm_struct *mm)
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index d23e61dc0640..6533da3036c9 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -82,6 +82,13 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
- #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
- #endif
- 
-+/*
-+ * If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point
-+ * to init_mm when we switch to a kernel thread (e.g. the idle thread).  If
-+ * it's false, then we immediately switch CR3 when entering a kernel thread.
-+ */
-+DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
-+
- /*
-  * 6 because 6 should be plenty and struct tlb_state will fit in
-  * two cache lines.
-@@ -104,6 +111,23 @@ struct tlb_state {
-       u16 loaded_mm_asid;
-       u16 next_asid;
- 
-+      /*
-+       * We can be in one of several states:
-+       *
-+       *  - Actively using an mm.  Our CPU's bit will be set in
-+       *    mm_cpumask(loaded_mm) and is_lazy == false;
-+       *
-+       *  - Not using a real mm.  loaded_mm == &init_mm.  Our CPU's bit
-+       *    will not be set in mm_cpumask(&init_mm) and is_lazy == false.
-+       *
-+       *  - Lazily using a real mm.  loaded_mm != &init_mm, our bit
-+       *    is set in mm_cpumask(loaded_mm), but is_lazy == true.
-+       *    We're heuristically guessing that the CR3 load we
-+       *    skipped more than makes up for the overhead added by
-+       *    lazy mode.
-+       */
-+      bool is_lazy;
-+
-       /*
-        * Access to this CR4 shadow and to H/W CR4 is protected by
-        * disabling interrupts when modifying either one.
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 440400316c8a..b27aceaf7ed1 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -30,6 +30,8 @@
- 
- atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
- 
-+DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
-+
- static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
-                           u16 *new_asid, bool *need_flush)
- {
-@@ -80,7 +82,7 @@ void leave_mm(int cpu)
-               return;
- 
-       /* Warn if we're not lazy. */
--      WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
-+      WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
- 
-       switch_mm(NULL, &init_mm, NULL);
- }
-@@ -140,52 +142,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-               __flush_tlb_all();
-       }
- #endif
-+      this_cpu_write(cpu_tlbstate.is_lazy, false);
- 
-       if (real_prev == next) {
-               VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
-                         next->context.ctx_id);
- 
--              if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
--                      /*
--                       * There's nothing to do: we weren't lazy, and we
--                       * aren't changing our mm.  We don't need to flush
--                       * anything, nor do we need to update CR3, CR4, or
--                       * LDTR.
--                       */
--                      return;
--              }
--
--              /* Resume remote flushes and then read tlb_gen. */
--              cpumask_set_cpu(cpu, mm_cpumask(next));
--              next_tlb_gen = atomic64_read(&next->context.tlb_gen);
--
--              if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
--                  next_tlb_gen) {
--                      /*
--                       * Ideally, we'd have a flush_tlb() variant that
--                       * takes the known CR3 value as input.  This would
--                       * be faster on Xen PV and on hypothetical CPUs
--                       * on which INVPCID is fast.
--                       */
--                      this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
--                                     next_tlb_gen);
--                      write_cr3(build_cr3(next, prev_asid));
--
--                      /*
--                       * This gets called via leave_mm() in the idle path
--                       * where RCU functions differently.  Tracing normally
--                       * uses RCU, so we have to call the tracepoint
--                       * specially here.
--                       */
--                      trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
--                                              TLB_FLUSH_ALL);
--              }
--
-               /*
--               * We just exited lazy mode, which means that CR4 and/or LDTR
--               * may be stale.  (Changes to the required CR4 and LDTR states
--               * are not reflected in tlb_gen.)
-+               * We don't currently support having a real mm loaded without
-+               * our cpu set in mm_cpumask().  We have all the bookkeeping
-+               * in place to figure out whether we would need to flush
-+               * if our cpu were cleared in mm_cpumask(), but we don't
-+               * currently use it.
-                */
-+              if (WARN_ON_ONCE(real_prev != &init_mm &&
-+                               !cpumask_test_cpu(cpu, mm_cpumask(next))))
-+                      cpumask_set_cpu(cpu, mm_cpumask(next));
-+
-+              return;
-       } else {
-               u16 new_asid;
-               bool need_flush;
-@@ -204,10 +178,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-               }
- 
-               /* Stop remote flushes for the previous mm */
--              if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
--                      cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
--
--              VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
-+              VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
-+                              real_prev != &init_mm);
-+              cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
- 
-               /*
-                * Start remote flushes and then read tlb_gen.
-@@ -237,6 +210,37 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-       switch_ldt(real_prev, next);
- }
- 
-+/*
-+ * enter_lazy_tlb() is a hint from the scheduler that we are entering a
-+ * kernel thread or other context without an mm.  Acceptable implementations
-+ * include doing nothing whatsoever, switching to init_mm, or various clever
-+ * lazy tricks to try to minimize TLB flushes.
-+ *
-+ * The scheduler reserves the right to call enter_lazy_tlb() several times
-+ * in a row.  It will notify us that we're going back to a real mm by
-+ * calling switch_mm_irqs_off().
-+ */
-+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-+{
-+      if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
-+              return;
-+
-+      if (static_branch_unlikely(&tlb_use_lazy_mode)) {
-+              /*
-+               * There's a significant optimization that may be possible
-+               * here.  We have accurate enough TLB flush tracking that we
-+               * don't need to maintain coherence of TLB per se when we're
-+               * lazy.  We do, however, need to maintain coherence of
-+               * paging-structure caches.  We could, in principle, leave our
-+               * old mm loaded and only switch to init_mm when
-+               * tlb_remove_page() happens.
-+               */
-+              this_cpu_write(cpu_tlbstate.is_lazy, true);
-+      } else {
-+              switch_mm(NULL, &init_mm, NULL);
-+      }
-+}
-+
- /*
-  * Call this when reinitializing a CPU.  It fixes the following potential
-  * problems:
-@@ -308,16 +312,20 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
-       /* This code cannot presently handle being reentered. */
-       VM_WARN_ON(!irqs_disabled());
- 
-+      if (unlikely(loaded_mm == &init_mm))
-+              return;
-+
-       VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
-                  loaded_mm->context.ctx_id);
- 
--      if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
-+      if (this_cpu_read(cpu_tlbstate.is_lazy)) {
-               /*
--               * We're in lazy mode -- don't flush.  We can get here on
--               * remote flushes due to races and on local flushes if a
--               * kernel thread coincidentally flushes the mm it's lazily
--               * still using.
-+               * We're in lazy mode.  We need to at least flush our
-+               * paging-structure cache to avoid speculatively reading
-+               * garbage into our TLB.  Since switching to init_mm is barely
-+               * slower than a minimal flush, just switch to init_mm.
-                */
-+              switch_mm_irqs_off(NULL, &init_mm, NULL);
-               return;
-       }
- 
-@@ -616,3 +624,57 @@ static int __init create_tlb_single_page_flush_ceiling(void)
-       return 0;
- }
- late_initcall(create_tlb_single_page_flush_ceiling);
-+
-+static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf,
-+                               size_t count, loff_t *ppos)
-+{
-+      char buf[2];
-+
-+      buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0';
-+      buf[1] = '\n';
-+
-+      return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-+}
-+
-+static ssize_t tlblazy_write_file(struct file *file,
-+               const char __user *user_buf, size_t count, loff_t *ppos)
-+{
-+      bool val;
-+
-+      if (kstrtobool_from_user(user_buf, count, &val))
-+              return -EINVAL;
-+
-+      if (val)
-+              static_branch_enable(&tlb_use_lazy_mode);
-+      else
-+              static_branch_disable(&tlb_use_lazy_mode);
-+
-+      return count;
-+}
-+
-+static const struct file_operations fops_tlblazy = {
-+      .read = tlblazy_read_file,
-+      .write = tlblazy_write_file,
-+      .llseek = default_llseek,
-+};
-+
-+static int __init init_tlb_use_lazy_mode(void)
-+{
-+      if (boot_cpu_has(X86_FEATURE_PCID)) {
-+              /*
-+               * Heuristic: with PCID on, switching to and from
-+               * init_mm is reasonably fast, but remote flush IPIs
-+               * as expensive as ever, so turn off lazy TLB mode.
-+               *
-+               * We can't do this in setup_pcid() because static keys
-+               * haven't been initialized yet, and it would blow up
-+               * badly.
-+               */
-+              static_branch_disable(&tlb_use_lazy_mode);
-+      }
-+
-+      debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR,
-+                          arch_debugfs_dir, NULL, &fops_tlblazy);
-+      return 0;
-+}
-+late_initcall(init_tlb_use_lazy_mode);
--- 
-2.14.2
-
diff --git a/patches/kernel/0048-Revert-x86-mm-Stop-calling-leave_mm-in-idle-code.patch b/patches/kernel/0048-Revert-x86-mm-Stop-calling-leave_mm-in-idle-code.patch

deleted file mode 100644 (file)

index ffd56ee..0000000
--- a/patches/kernel/0048-Revert-x86-mm-Stop-calling-leave_mm-in-idle-code.patch
+++ /dev/null
@@ -1,101 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sat, 4 Nov 2017 04:16:12 -0700
-Subject: [PATCH] Revert "x86/mm: Stop calling leave_mm() in idle code"
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This reverts commit 43858b4f25cf0adc5c2ca9cf5ce5fdf2532941e5.
-
-The reason I removed the leave_mm() calls in question is because the
-heuristic wasn't needed after that patch.  With the original version
-of my PCID series, we never flushed a "lazy cpu" (i.e. a CPU running
-kernel thread) due a flush on the loaded mm.
-
-Unfortunately, that caused architectural issues, so now I've
-reinstated these flushes on non-PCID systems in:
-
-    commit b956575bed91 ("x86/mm: Flush more aggressively in lazy TLB mode").
-
-That, in turn, gives us a power management and occasionally
-performance regression as compared to old kernels: a process that
-goes into a deep idle state on a given CPU and gets its mm flushed
-due to activity on a different CPU will wake the idle CPU.
-
-Reinstate the old ugly heuristic: if a CPU goes into ACPI C3 or an
-intel_idle state that is likely to cause a TLB flush gets its mm
-switched to init_mm before going idle.
-
-FWIW, this heuristic is lousy.  Whether we should change CR3 before
-idle isn't a good hint except insofar as the performance hit is a bit
-lower if the TLB is getting flushed by the idle code anyway.  What we
-really want to know is whether we anticipate being idle long enough
-that the mm is likely to be flushed before we wake up.  This is more a
-matter of the expected latency than the idle state that gets chosen.
-This heuristic also completely fails on systems that don't know
-whether the TLB will be flushed (e.g. AMD systems?).  OTOH it may be a
-bit obsolete anyway -- PCID systems don't presently benefit from this
-heuristic at all.
-
-We also shouldn't do this callback from innermost bit of the idle code
-due to the RCU nastiness it causes.  All the information need is
-available before rcu_idle_enter() needs to happen.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Fixes: 43858b4f25cf "x86/mm: Stop calling leave_mm() in idle code"
-Link: http://lkml.kernel.org/r/c513bbd4e653747213e05bc7062de000bf0202a5.1509793738.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 675357362aeba19688440eb1aaa7991067f73b12)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit b607843145fd0593fcd87e2596d1dc5a1d5f79a5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/tlb.c | 16 +++++++++++++---
- 1 file changed, 13 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index b27aceaf7ed1..ed06f1593390 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -194,12 +194,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
-                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-                       write_cr3(build_cr3(next, new_asid));
--                      trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
--                                      TLB_FLUSH_ALL);
-+
-+                      /*
-+                       * NB: This gets called via leave_mm() in the idle path
-+                       * where RCU functions differently.  Tracing normally
-+                       * uses RCU, so we need to use the _rcuidle variant.
-+                       *
-+                       * (There is no good reason for this.  The idle code should
-+                       *  be rearranged to call this before rcu_idle_enter().)
-+                       */
-+                      trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-               } else {
-                       /* The new ASID is already up to date. */
-                       write_cr3(build_cr3_noflush(next, new_asid));
--                      trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
-+
-+                      /* See above wrt _rcuidle. */
-+                      trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
-               }
- 
-               this_cpu_write(cpu_tlbstate.loaded_mm, next);
--- 
-2.14.2
-
diff --git a/patches/kernel/0048-x86-mm-Flush-more-aggressively-in-lazy-TLB-mode.patch b/patches/kernel/0048-x86-mm-Flush-more-aggressively-in-lazy-TLB-mode.patch

new file mode 100644 (file)

index 0000000..62c8c07
--- /dev/null
+++ b/patches/kernel/0048-x86-mm-Flush-more-aggressively-in-lazy-TLB-mode.patch
@@ -0,0 +1,401 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 9 Oct 2017 09:50:49 -0700
+Subject: [PATCH] x86/mm: Flush more aggressively in lazy TLB mode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Since commit:
+
+  94b1b03b519b ("x86/mm: Rework lazy TLB mode and TLB freshness tracking")
+
+x86's lazy TLB mode has been all the way lazy: when running a kernel thread
+(including the idle thread), the kernel keeps using the last user mm's
+page tables without attempting to maintain user TLB coherence at all.
+
+From a pure semantic perspective, this is fine -- kernel threads won't
+attempt to access user pages, so having stale TLB entries doesn't matter.
+
+Unfortunately, I forgot about a subtlety.  By skipping TLB flushes,
+we also allow any paging-structure caches that may exist on the CPU
+to become incoherent.  This means that we can have a
+paging-structure cache entry that references a freed page table, and
+the CPU is within its rights to do a speculative page walk starting
+at the freed page table.
+
+I can imagine this causing two different problems:
+
+ - A speculative page walk starting from a bogus page table could read
+   IO addresses.  I haven't seen any reports of this causing problems.
+
+ - A speculative page walk that involves a bogus page table can install
+   garbage in the TLB.  Such garbage would always be at a user VA, but
+   some AMD CPUs have logic that triggers a machine check when it notices
+   these bogus entries.  I've seen a couple reports of this.
+
+Boris further explains the failure mode:
+
+> It is actually more of an optimization which assumes that paging-structure
+> entries are in WB DRAM:
+>
+> "TlbCacheDis: cacheable memory disable. Read-write. 0=Enables
+> performance optimization that assumes PML4, PDP, PDE, and PTE entries
+> are in cacheable WB-DRAM; memory type checks may be bypassed, and
+> addresses outside of WB-DRAM may result in undefined behavior or NB
+> protocol errors. 1=Disables performance optimization and allows PML4,
+> PDP, PDE and PTE entries to be in any memory type. Operating systems
+> that maintain page tables in memory types other than WB- DRAM must set
+> TlbCacheDis to insure proper operation."
+>
+> The MCE generated is an NB protocol error to signal that
+>
+> "Link: A specific coherent-only packet from a CPU was issued to an
+> IO link. This may be caused by software which addresses page table
+> structures in a memory type other than cacheable WB-DRAM without
+> properly configuring MSRC001_0015[TlbCacheDis]. This may occur, for
+> example, when page table structure addresses are above top of memory. In
+> such cases, the NB will generate an MCE if it sees a mismatch between
+> the memory operation generated by the core and the link type."
+>
+> I'm assuming coherent-only packets don't go out on IO links, thus the
+> error.
+
+To fix this, reinstate TLB coherence in lazy mode.  With this patch
+applied, we do it in one of two ways:
+
+ - If we have PCID, we simply switch back to init_mm's page tables
+   when we enter a kernel thread -- this seems to be quite cheap
+   except for the cost of serializing the CPU.
+
+ - If we don't have PCID, then we set a flag and switch to init_mm
+   the first time we would otherwise need to flush the TLB.
+
+The /sys/kernel/debug/x86/tlb_use_lazy_mode debug switch can be changed
+to override the default mode for benchmarking.
+
+In theory, we could optimize this better by only flushing the TLB in
+lazy CPUs when a page table is freed.  Doing that would require
+auditing the mm code to make sure that all page table freeing goes
+through tlb_remove_page() as well as reworking some data structures
+to implement the improved flush logic.
+
+Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
+Reported-by: Adam Borowski <kilobyte@angband.pl>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Daniel Borkmann <daniel@iogearbox.net>
+Cc: Eric Biggers <ebiggers@google.com>
+Cc: Johannes Hirte <johannes.hirte@datenkhaos.de>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Roman Kagan <rkagan@virtuozzo.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Fixes: 94b1b03b519b ("x86/mm: Rework lazy TLB mode and TLB freshness tracking")
+Link: http://lkml.kernel.org/r/20171009170231.fkpraqokz6e4zeco@pd.tnic
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit b956575bed91ecfb136a8300742ecbbf451471ab)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit a4bb9409c548ece51ec246fc5113a32b8d130142)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/mmu_context.h |   8 +-
+ arch/x86/include/asm/tlbflush.h    |  24 ++++++
+ arch/x86/mm/tlb.c                  | 160 +++++++++++++++++++++++++------------
+ 3 files changed, 136 insertions(+), 56 deletions(-)
+
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index c120b5db178a..3c856a15b98e 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -126,13 +126,7 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
+       DEBUG_LOCKS_WARN_ON(preemptible());
+ }
+ 
+-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+-{
+-      int cpu = smp_processor_id();
+-
+-      if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
+-              cpumask_clear_cpu(cpu, mm_cpumask(mm));
+-}
++void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+ 
+ static inline int init_new_context(struct task_struct *tsk,
+                                  struct mm_struct *mm)
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index d23e61dc0640..6533da3036c9 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -82,6 +82,13 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+ #endif
+ 
++/*
++ * If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point
++ * to init_mm when we switch to a kernel thread (e.g. the idle thread).  If
++ * it's false, then we immediately switch CR3 when entering a kernel thread.
++ */
++DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
++
+ /*
+  * 6 because 6 should be plenty and struct tlb_state will fit in
+  * two cache lines.
+@@ -104,6 +111,23 @@ struct tlb_state {
+       u16 loaded_mm_asid;
+       u16 next_asid;
+ 
++      /*
++       * We can be in one of several states:
++       *
++       *  - Actively using an mm.  Our CPU's bit will be set in
++       *    mm_cpumask(loaded_mm) and is_lazy == false;
++       *
++       *  - Not using a real mm.  loaded_mm == &init_mm.  Our CPU's bit
++       *    will not be set in mm_cpumask(&init_mm) and is_lazy == false.
++       *
++       *  - Lazily using a real mm.  loaded_mm != &init_mm, our bit
++       *    is set in mm_cpumask(loaded_mm), but is_lazy == true.
++       *    We're heuristically guessing that the CR3 load we
++       *    skipped more than makes up for the overhead added by
++       *    lazy mode.
++       */
++      bool is_lazy;
++
+       /*
+        * Access to this CR4 shadow and to H/W CR4 is protected by
+        * disabling interrupts when modifying either one.
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 440400316c8a..b27aceaf7ed1 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -30,6 +30,8 @@
+ 
+ atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
+ 
++DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
++
+ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
+                           u16 *new_asid, bool *need_flush)
+ {
+@@ -80,7 +82,7 @@ void leave_mm(int cpu)
+               return;
+ 
+       /* Warn if we're not lazy. */
+-      WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
++      WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
+ 
+       switch_mm(NULL, &init_mm, NULL);
+ }
+@@ -140,52 +142,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+               __flush_tlb_all();
+       }
+ #endif
++      this_cpu_write(cpu_tlbstate.is_lazy, false);
+ 
+       if (real_prev == next) {
+               VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
+                         next->context.ctx_id);
+ 
+-              if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
+-                      /*
+-                       * There's nothing to do: we weren't lazy, and we
+-                       * aren't changing our mm.  We don't need to flush
+-                       * anything, nor do we need to update CR3, CR4, or
+-                       * LDTR.
+-                       */
+-                      return;
+-              }
+-
+-              /* Resume remote flushes and then read tlb_gen. */
+-              cpumask_set_cpu(cpu, mm_cpumask(next));
+-              next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+-
+-              if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
+-                  next_tlb_gen) {
+-                      /*
+-                       * Ideally, we'd have a flush_tlb() variant that
+-                       * takes the known CR3 value as input.  This would
+-                       * be faster on Xen PV and on hypothetical CPUs
+-                       * on which INVPCID is fast.
+-                       */
+-                      this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
+-                                     next_tlb_gen);
+-                      write_cr3(build_cr3(next, prev_asid));
+-
+-                      /*
+-                       * This gets called via leave_mm() in the idle path
+-                       * where RCU functions differently.  Tracing normally
+-                       * uses RCU, so we have to call the tracepoint
+-                       * specially here.
+-                       */
+-                      trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
+-                                              TLB_FLUSH_ALL);
+-              }
+-
+               /*
+-               * We just exited lazy mode, which means that CR4 and/or LDTR
+-               * may be stale.  (Changes to the required CR4 and LDTR states
+-               * are not reflected in tlb_gen.)
++               * We don't currently support having a real mm loaded without
++               * our cpu set in mm_cpumask().  We have all the bookkeeping
++               * in place to figure out whether we would need to flush
++               * if our cpu were cleared in mm_cpumask(), but we don't
++               * currently use it.
+                */
++              if (WARN_ON_ONCE(real_prev != &init_mm &&
++                               !cpumask_test_cpu(cpu, mm_cpumask(next))))
++                      cpumask_set_cpu(cpu, mm_cpumask(next));
++
++              return;
+       } else {
+               u16 new_asid;
+               bool need_flush;
+@@ -204,10 +178,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+               }
+ 
+               /* Stop remote flushes for the previous mm */
+-              if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
+-                      cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
+-
+-              VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
++              VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
++                              real_prev != &init_mm);
++              cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
+ 
+               /*
+                * Start remote flushes and then read tlb_gen.
+@@ -237,6 +210,37 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+       switch_ldt(real_prev, next);
+ }
+ 
++/*
++ * enter_lazy_tlb() is a hint from the scheduler that we are entering a
++ * kernel thread or other context without an mm.  Acceptable implementations
++ * include doing nothing whatsoever, switching to init_mm, or various clever
++ * lazy tricks to try to minimize TLB flushes.
++ *
++ * The scheduler reserves the right to call enter_lazy_tlb() several times
++ * in a row.  It will notify us that we're going back to a real mm by
++ * calling switch_mm_irqs_off().
++ */
++void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
++{
++      if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
++              return;
++
++      if (static_branch_unlikely(&tlb_use_lazy_mode)) {
++              /*
++               * There's a significant optimization that may be possible
++               * here.  We have accurate enough TLB flush tracking that we
++               * don't need to maintain coherence of TLB per se when we're
++               * lazy.  We do, however, need to maintain coherence of
++               * paging-structure caches.  We could, in principle, leave our
++               * old mm loaded and only switch to init_mm when
++               * tlb_remove_page() happens.
++               */
++              this_cpu_write(cpu_tlbstate.is_lazy, true);
++      } else {
++              switch_mm(NULL, &init_mm, NULL);
++      }
++}
++
+ /*
+  * Call this when reinitializing a CPU.  It fixes the following potential
+  * problems:
+@@ -308,16 +312,20 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
+       /* This code cannot presently handle being reentered. */
+       VM_WARN_ON(!irqs_disabled());
+ 
++      if (unlikely(loaded_mm == &init_mm))
++              return;
++
+       VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
+                  loaded_mm->context.ctx_id);
+ 
+-      if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
++      if (this_cpu_read(cpu_tlbstate.is_lazy)) {
+               /*
+-               * We're in lazy mode -- don't flush.  We can get here on
+-               * remote flushes due to races and on local flushes if a
+-               * kernel thread coincidentally flushes the mm it's lazily
+-               * still using.
++               * We're in lazy mode.  We need to at least flush our
++               * paging-structure cache to avoid speculatively reading
++               * garbage into our TLB.  Since switching to init_mm is barely
++               * slower than a minimal flush, just switch to init_mm.
+                */
++              switch_mm_irqs_off(NULL, &init_mm, NULL);
+               return;
+       }
+ 
+@@ -616,3 +624,57 @@ static int __init create_tlb_single_page_flush_ceiling(void)
+       return 0;
+ }
+ late_initcall(create_tlb_single_page_flush_ceiling);
++
++static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf,
++                               size_t count, loff_t *ppos)
++{
++      char buf[2];
++
++      buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0';
++      buf[1] = '\n';
++
++      return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
++}
++
++static ssize_t tlblazy_write_file(struct file *file,
++               const char __user *user_buf, size_t count, loff_t *ppos)
++{
++      bool val;
++
++      if (kstrtobool_from_user(user_buf, count, &val))
++              return -EINVAL;
++
++      if (val)
++              static_branch_enable(&tlb_use_lazy_mode);
++      else
++              static_branch_disable(&tlb_use_lazy_mode);
++
++      return count;
++}
++
++static const struct file_operations fops_tlblazy = {
++      .read = tlblazy_read_file,
++      .write = tlblazy_write_file,
++      .llseek = default_llseek,
++};
++
++static int __init init_tlb_use_lazy_mode(void)
++{
++      if (boot_cpu_has(X86_FEATURE_PCID)) {
++              /*
++               * Heuristic: with PCID on, switching to and from
++               * init_mm is reasonably fast, but remote flush IPIs
++               * as expensive as ever, so turn off lazy TLB mode.
++               *
++               * We can't do this in setup_pcid() because static keys
++               * haven't been initialized yet, and it would blow up
++               * badly.
++               */
++              static_branch_disable(&tlb_use_lazy_mode);
++      }
++
++      debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR,
++                          arch_debugfs_dir, NULL, &fops_tlblazy);
++      return 0;
++}
++late_initcall(init_tlb_use_lazy_mode);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0049-Revert-x86-mm-Stop-calling-leave_mm-in-idle-code.patch b/patches/kernel/0049-Revert-x86-mm-Stop-calling-leave_mm-in-idle-code.patch

new file mode 100644 (file)

index 0000000..ffd56ee
--- /dev/null
+++ b/patches/kernel/0049-Revert-x86-mm-Stop-calling-leave_mm-in-idle-code.patch
@@ -0,0 +1,101 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 4 Nov 2017 04:16:12 -0700
+Subject: [PATCH] Revert "x86/mm: Stop calling leave_mm() in idle code"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This reverts commit 43858b4f25cf0adc5c2ca9cf5ce5fdf2532941e5.
+
+The reason I removed the leave_mm() calls in question is because the
+heuristic wasn't needed after that patch.  With the original version
+of my PCID series, we never flushed a "lazy cpu" (i.e. a CPU running
+kernel thread) due a flush on the loaded mm.
+
+Unfortunately, that caused architectural issues, so now I've
+reinstated these flushes on non-PCID systems in:
+
+    commit b956575bed91 ("x86/mm: Flush more aggressively in lazy TLB mode").
+
+That, in turn, gives us a power management and occasionally
+performance regression as compared to old kernels: a process that
+goes into a deep idle state on a given CPU and gets its mm flushed
+due to activity on a different CPU will wake the idle CPU.
+
+Reinstate the old ugly heuristic: if a CPU goes into ACPI C3 or an
+intel_idle state that is likely to cause a TLB flush gets its mm
+switched to init_mm before going idle.
+
+FWIW, this heuristic is lousy.  Whether we should change CR3 before
+idle isn't a good hint except insofar as the performance hit is a bit
+lower if the TLB is getting flushed by the idle code anyway.  What we
+really want to know is whether we anticipate being idle long enough
+that the mm is likely to be flushed before we wake up.  This is more a
+matter of the expected latency than the idle state that gets chosen.
+This heuristic also completely fails on systems that don't know
+whether the TLB will be flushed (e.g. AMD systems?).  OTOH it may be a
+bit obsolete anyway -- PCID systems don't presently benefit from this
+heuristic at all.
+
+We also shouldn't do this callback from innermost bit of the idle code
+due to the RCU nastiness it causes.  All the information need is
+available before rcu_idle_enter() needs to happen.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Fixes: 43858b4f25cf "x86/mm: Stop calling leave_mm() in idle code"
+Link: http://lkml.kernel.org/r/c513bbd4e653747213e05bc7062de000bf0202a5.1509793738.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 675357362aeba19688440eb1aaa7991067f73b12)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit b607843145fd0593fcd87e2596d1dc5a1d5f79a5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/tlb.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index b27aceaf7ed1..ed06f1593390 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -194,12 +194,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
+                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
+                       write_cr3(build_cr3(next, new_asid));
+-                      trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
+-                                      TLB_FLUSH_ALL);
++
++                      /*
++                       * NB: This gets called via leave_mm() in the idle path
++                       * where RCU functions differently.  Tracing normally
++                       * uses RCU, so we need to use the _rcuidle variant.
++                       *
++                       * (There is no good reason for this.  The idle code should
++                       *  be rearranged to call this before rcu_idle_enter().)
++                       */
++                      trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+               } else {
+                       /* The new ASID is already up to date. */
+                       write_cr3(build_cr3_noflush(next, new_asid));
+-                      trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
++
++                      /* See above wrt _rcuidle. */
++                      trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
+               }
+ 
+               this_cpu_write(cpu_tlbstate.loaded_mm, next);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0049-kprobes-x86-Set-up-frame-pointer-in-kprobe-trampolin.patch b/patches/kernel/0049-kprobes-x86-Set-up-frame-pointer-in-kprobe-trampolin.patch

deleted file mode 100644 (file)

index 4d673b0..0000000
--- a/patches/kernel/0049-kprobes-x86-Set-up-frame-pointer-in-kprobe-trampolin.patch
+++ /dev/null
@@ -1,85 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Tue, 3 Oct 2017 08:51:43 -0500
-Subject: [PATCH] kprobes/x86: Set up frame pointer in kprobe trampoline
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Richard Weinberger saw an unwinder warning when running bcc's opensnoop:
-
-  WARNING: kernel stack frame pointer at ffff99ef4076bea0 in opensnoop:2008 has bad value 0000000000000008
-  unwind stack type:0 next_sp:          (null) mask:0x2 graph_idx:0
-  ...
-  ffff99ef4076be88: ffff99ef4076bea0 (0xffff99ef4076bea0)
-  ffff99ef4076be90: ffffffffac442721 (optimized_callback +0x81/0x90)
-  ...
-
-A lockdep stack trace was initiated from inside a kprobe handler, when
-the unwinder noticed a bad frame pointer on the stack.  The bad frame
-pointer is related to the fact that the kprobe optprobe trampoline
-doesn't save the frame pointer before calling into optimized_callback().
-
-Reported-and-tested-by: Richard Weinberger <richard@sigma-star.at>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
-Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
-Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
-Cc: David S . Miller <davem@davemloft.net>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/7aef2f8ecd75c2f505ef9b80490412262cf4a44c.1507038547.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit ee213fc72fd67d0988525af501534f4cb924d1e9)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 0f7d5518c91335584b16c7bed1c54c10b78ea76a)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/kprobes/common.h | 13 +++++++++++--
- 1 file changed, 11 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
-index db2182d63ed0..3fc0f9a794cb 100644
---- a/arch/x86/kernel/kprobes/common.h
-+++ b/arch/x86/kernel/kprobes/common.h
-@@ -3,6 +3,15 @@
- 
- /* Kprobes and Optprobes common header */
- 
-+#include <asm/asm.h>
-+
-+#ifdef CONFIG_FRAME_POINTER
-+# define SAVE_RBP_STRING "    push %" _ASM_BP "\n" \
-+                       "      mov  %" _ASM_SP ", %" _ASM_BP "\n"
-+#else
-+# define SAVE_RBP_STRING "    push %" _ASM_BP "\n"
-+#endif
-+
- #ifdef CONFIG_X86_64
- #define SAVE_REGS_STRING                      \
-       /* Skip cs, ip, orig_ax. */             \
-@@ -17,7 +26,7 @@
-       "       pushq %r10\n"                   \
-       "       pushq %r11\n"                   \
-       "       pushq %rbx\n"                   \
--      "       pushq %rbp\n"                   \
-+      SAVE_RBP_STRING                         \
-       "       pushq %r12\n"                   \
-       "       pushq %r13\n"                   \
-       "       pushq %r14\n"                   \
-@@ -48,7 +57,7 @@
-       "       pushl %es\n"                    \
-       "       pushl %ds\n"                    \
-       "       pushl %eax\n"                   \
--      "       pushl %ebp\n"                   \
-+      SAVE_RBP_STRING                         \
-       "       pushl %edi\n"                   \
-       "       pushl %esi\n"                   \
-       "       pushl %edx\n"                   \
--- 
-2.14.2
-
diff --git a/patches/kernel/0050-kprobes-x86-Set-up-frame-pointer-in-kprobe-trampolin.patch b/patches/kernel/0050-kprobes-x86-Set-up-frame-pointer-in-kprobe-trampolin.patch

new file mode 100644 (file)

index 0000000..4d673b0
--- /dev/null
+++ b/patches/kernel/0050-kprobes-x86-Set-up-frame-pointer-in-kprobe-trampolin.patch
@@ -0,0 +1,85 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Tue, 3 Oct 2017 08:51:43 -0500
+Subject: [PATCH] kprobes/x86: Set up frame pointer in kprobe trampoline
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Richard Weinberger saw an unwinder warning when running bcc's opensnoop:
+
+  WARNING: kernel stack frame pointer at ffff99ef4076bea0 in opensnoop:2008 has bad value 0000000000000008
+  unwind stack type:0 next_sp:          (null) mask:0x2 graph_idx:0
+  ...
+  ffff99ef4076be88: ffff99ef4076bea0 (0xffff99ef4076bea0)
+  ffff99ef4076be90: ffffffffac442721 (optimized_callback +0x81/0x90)
+  ...
+
+A lockdep stack trace was initiated from inside a kprobe handler, when
+the unwinder noticed a bad frame pointer on the stack.  The bad frame
+pointer is related to the fact that the kprobe optprobe trampoline
+doesn't save the frame pointer before calling into optimized_callback().
+
+Reported-and-tested-by: Richard Weinberger <richard@sigma-star.at>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
+Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+Cc: David S . Miller <davem@davemloft.net>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/7aef2f8ecd75c2f505ef9b80490412262cf4a44c.1507038547.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit ee213fc72fd67d0988525af501534f4cb924d1e9)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 0f7d5518c91335584b16c7bed1c54c10b78ea76a)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/kprobes/common.h | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
+index db2182d63ed0..3fc0f9a794cb 100644
+--- a/arch/x86/kernel/kprobes/common.h
++++ b/arch/x86/kernel/kprobes/common.h
+@@ -3,6 +3,15 @@
+ 
+ /* Kprobes and Optprobes common header */
+ 
++#include <asm/asm.h>
++
++#ifdef CONFIG_FRAME_POINTER
++# define SAVE_RBP_STRING "    push %" _ASM_BP "\n" \
++                       "      mov  %" _ASM_SP ", %" _ASM_BP "\n"
++#else
++# define SAVE_RBP_STRING "    push %" _ASM_BP "\n"
++#endif
++
+ #ifdef CONFIG_X86_64
+ #define SAVE_REGS_STRING                      \
+       /* Skip cs, ip, orig_ax. */             \
+@@ -17,7 +26,7 @@
+       "       pushq %r10\n"                   \
+       "       pushq %r11\n"                   \
+       "       pushq %rbx\n"                   \
+-      "       pushq %rbp\n"                   \
++      SAVE_RBP_STRING                         \
+       "       pushq %r12\n"                   \
+       "       pushq %r13\n"                   \
+       "       pushq %r14\n"                   \
+@@ -48,7 +57,7 @@
+       "       pushl %es\n"                    \
+       "       pushl %ds\n"                    \
+       "       pushl %eax\n"                   \
+-      "       pushl %ebp\n"                   \
++      SAVE_RBP_STRING                         \
+       "       pushl %edi\n"                   \
+       "       pushl %esi\n"                   \
+       "       pushl %edx\n"                   \
+-- 
+2.14.2
+
diff --git a/patches/kernel/0050-x86-tracing-Introduce-a-static-key-for-exception-tra.patch b/patches/kernel/0050-x86-tracing-Introduce-a-static-key-for-exception-tra.patch

deleted file mode 100644 (file)

index efbc800..0000000
--- a/patches/kernel/0050-x86-tracing-Introduce-a-static-key-for-exception-tra.patch
+++ /dev/null
@@ -1,139 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 28 Aug 2017 08:47:21 +0200
-Subject: [PATCH] x86/tracing: Introduce a static key for exception tracing
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Switching the IDT just for avoiding tracepoints creates a completely
-impenetrable macro/inline/ifdef mess.
-
-There is no point in avoiding tracepoints for most of the traps/exceptions.
-For the more expensive tracepoints, like pagefaults, this can be handled with
-an explicit static key.
-
-Preparatory patch to remove the tracing IDT.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Steven Rostedt <rostedt@goodmis.org>
-Link: http://lkml.kernel.org/r/20170828064956.593094539@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 2feb1b316d48004d905278c02a55902cab0be8be)
-Signed-off-by: Andy Whitcroft <apw@kathleen.maas>
-(cherry picked from commit 15e0ff2a63fdd93f8881e2ebba5c048c5b601e57)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d58a56e851c339d8d9d311dc9b4fad6abbf8bf19)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/trace/common.h      | 15 +++++++++++++++
- arch/x86/include/asm/trace/exceptions.h  |  4 +---
- arch/x86/include/asm/trace/irq_vectors.h |  4 +---
- arch/x86/kernel/tracepoint.c             |  9 ++++++++-
- 4 files changed, 25 insertions(+), 7 deletions(-)
- create mode 100644 arch/x86/include/asm/trace/common.h
-
-diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h
-new file mode 100644
-index 000000000000..b1eb7b18ee8a
---- /dev/null
-+++ b/arch/x86/include/asm/trace/common.h
-@@ -0,0 +1,15 @@
-+#ifndef _ASM_TRACE_COMMON_H
-+#define _ASM_TRACE_COMMON_H
-+
-+extern int trace_irq_vector_regfunc(void);
-+extern void trace_irq_vector_unregfunc(void);
-+
-+#ifdef CONFIG_TRACING
-+DECLARE_STATIC_KEY_FALSE(trace_irqvectors_key);
-+#define trace_irqvectors_enabled()                    \
-+      static_branch_unlikely(&trace_irqvectors_key)
-+#else
-+static inline bool trace_irqvectors_enabled(void) { return false; }
-+#endif
-+
-+#endif
-diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
-index 2422b14c50a7..960a5b50ac3b 100644
---- a/arch/x86/include/asm/trace/exceptions.h
-+++ b/arch/x86/include/asm/trace/exceptions.h
-@@ -5,9 +5,7 @@
- #define _TRACE_PAGE_FAULT_H
- 
- #include <linux/tracepoint.h>
--
--extern int trace_irq_vector_regfunc(void);
--extern void trace_irq_vector_unregfunc(void);
-+#include <asm/trace/common.h>
- 
- DECLARE_EVENT_CLASS(x86_exceptions,
- 
-diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
-index 32dd6a9e343c..7825b4426e7e 100644
---- a/arch/x86/include/asm/trace/irq_vectors.h
-+++ b/arch/x86/include/asm/trace/irq_vectors.h
-@@ -5,9 +5,7 @@
- #define _TRACE_IRQ_VECTORS_H
- 
- #include <linux/tracepoint.h>
--
--extern int trace_irq_vector_regfunc(void);
--extern void trace_irq_vector_unregfunc(void);
-+#include <asm/trace/common.h>
- 
- DECLARE_EVENT_CLASS(x86_irq_vector,
- 
-diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
-index 15515132bf0d..dd4aa04bb95c 100644
---- a/arch/x86/kernel/tracepoint.c
-+++ b/arch/x86/kernel/tracepoint.c
-@@ -4,9 +4,11 @@
-  * Copyright (C) 2013 Seiji Aguchi <seiji.aguchi@hds.com>
-  *
-  */
-+#include <linux/jump_label.h>
-+#include <linux/atomic.h>
-+
- #include <asm/hw_irq.h>
- #include <asm/desc.h>
--#include <linux/atomic.h>
- 
- atomic_t trace_idt_ctr = ATOMIC_INIT(0);
- struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
-@@ -15,6 +17,7 @@ struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
- /* No need to be aligned, but done to keep all IDTs defined the same way. */
- gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
- 
-+DEFINE_STATIC_KEY_FALSE(trace_irqvectors_key);
- static int trace_irq_vector_refcount;
- static DEFINE_MUTEX(irq_vector_mutex);
- 
-@@ -36,6 +39,8 @@ static void switch_idt(void *arg)
- 
- int trace_irq_vector_regfunc(void)
- {
-+      static_branch_inc(&trace_irqvectors_key);
-+
-       mutex_lock(&irq_vector_mutex);
-       if (!trace_irq_vector_refcount) {
-               set_trace_idt_ctr(1);
-@@ -49,6 +54,8 @@ int trace_irq_vector_regfunc(void)
- 
- void trace_irq_vector_unregfunc(void)
- {
-+      static_branch_dec(&trace_irqvectors_key);
-+
-       mutex_lock(&irq_vector_mutex);
-       trace_irq_vector_refcount--;
-       if (!trace_irq_vector_refcount) {
--- 
-2.14.2
-
diff --git a/patches/kernel/0051-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch b/patches/kernel/0051-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch

deleted file mode 100644 (file)

index 0872493..0000000
--- a/patches/kernel/0051-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch
+++ /dev/null
@@ -1,189 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Mon, 17 Jul 2017 16:10:33 -0500
-Subject: [PATCH] x86/boot: Add early cmdline parsing for options with
- arguments
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Add a cmdline_find_option() function to look for cmdline options that
-take arguments. The argument is returned in a supplied buffer and the
-argument length (regardless of whether it fits in the supplied buffer)
-is returned, with -1 indicating not found.
-
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Alexander Potapenko <glider@google.com>
-Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Arnd Bergmann <arnd@arndb.de>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brijesh Singh <brijesh.singh@amd.com>
-Cc: Dave Young <dyoung@redhat.com>
-Cc: Dmitry Vyukov <dvyukov@google.com>
-Cc: Jonathan Corbet <corbet@lwn.net>
-Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-Cc: Larry Woodman <lwoodman@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Matt Fleming <matt@codeblueprint.co.uk>
-Cc: Michael S. Tsirkin <mst@redhat.com>
-Cc: Paolo Bonzini <pbonzini@redhat.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Radim Krčmář <rkrcmar@redhat.com>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Toshimitsu Kani <toshi.kani@hpe.com>
-Cc: kasan-dev@googlegroups.com
-Cc: kvm@vger.kernel.org
-Cc: linux-arch@vger.kernel.org
-Cc: linux-doc@vger.kernel.org
-Cc: linux-efi@vger.kernel.org
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/36b5f97492a9745dce27682305f990fc20e5cf8a.1500319216.git.thomas.lendacky@amd.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit e505371dd83963caae1a37ead9524e8d997341be)
-Signed-off-by: Andy Whitcroft <apw@kathleen.maas>
-(cherry picked from commit 37569cd003aa69a57d5666530436c2d973a57b26)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit b9f03418aa9b8ecbb1c7f32ac2bfe68fd21de4f5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cmdline.h |   2 +
- arch/x86/lib/cmdline.c         | 105 +++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 107 insertions(+)
-
-diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
-index e01f7f7ccb0c..84ae170bc3d0 100644
---- a/arch/x86/include/asm/cmdline.h
-+++ b/arch/x86/include/asm/cmdline.h
-@@ -2,5 +2,7 @@
- #define _ASM_X86_CMDLINE_H
- 
- int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
-+int cmdline_find_option(const char *cmdline_ptr, const char *option,
-+                      char *buffer, int bufsize);
- 
- #endif /* _ASM_X86_CMDLINE_H */
-diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
-index 5cc78bf57232..3261abb21ef4 100644
---- a/arch/x86/lib/cmdline.c
-+++ b/arch/x86/lib/cmdline.c
-@@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
-       return 0;       /* Buffer overrun */
- }
- 
-+/*
-+ * Find a non-boolean option (i.e. option=argument). In accordance with
-+ * standard Linux practice, if this option is repeated, this returns the
-+ * last instance on the command line.
-+ *
-+ * @cmdline: the cmdline string
-+ * @max_cmdline_size: the maximum size of cmdline
-+ * @option: option string to look for
-+ * @buffer: memory buffer to return the option argument
-+ * @bufsize: size of the supplied memory buffer
-+ *
-+ * Returns the length of the argument (regardless of if it was
-+ * truncated to fit in the buffer), or -1 on not found.
-+ */
-+static int
-+__cmdline_find_option(const char *cmdline, int max_cmdline_size,
-+                    const char *option, char *buffer, int bufsize)
-+{
-+      char c;
-+      int pos = 0, len = -1;
-+      const char *opptr = NULL;
-+      char *bufptr = buffer;
-+      enum {
-+              st_wordstart = 0,       /* Start of word/after whitespace */
-+              st_wordcmp,     /* Comparing this word */
-+              st_wordskip,    /* Miscompare, skip */
-+              st_bufcpy,      /* Copying this to buffer */
-+      } state = st_wordstart;
-+
-+      if (!cmdline)
-+              return -1;      /* No command line */
-+
-+      /*
-+       * This 'pos' check ensures we do not overrun
-+       * a non-NULL-terminated 'cmdline'
-+       */
-+      while (pos++ < max_cmdline_size) {
-+              c = *(char *)cmdline++;
-+              if (!c)
-+                      break;
-+
-+              switch (state) {
-+              case st_wordstart:
-+                      if (myisspace(c))
-+                              break;
-+
-+                      state = st_wordcmp;
-+                      opptr = option;
-+                      /* fall through */
-+
-+              case st_wordcmp:
-+                      if ((c == '=') && !*opptr) {
-+                              /*
-+                               * We matched all the way to the end of the
-+                               * option we were looking for, prepare to
-+                               * copy the argument.
-+                               */
-+                              len = 0;
-+                              bufptr = buffer;
-+                              state = st_bufcpy;
-+                              break;
-+                      } else if (c == *opptr++) {
-+                              /*
-+                               * We are currently matching, so continue
-+                               * to the next character on the cmdline.
-+                               */
-+                              break;
-+                      }
-+                      state = st_wordskip;
-+                      /* fall through */
-+
-+              case st_wordskip:
-+                      if (myisspace(c))
-+                              state = st_wordstart;
-+                      break;
-+
-+              case st_bufcpy:
-+                      if (myisspace(c)) {
-+                              state = st_wordstart;
-+                      } else {
-+                              /*
-+                               * Increment len, but don't overrun the
-+                               * supplied buffer and leave room for the
-+                               * NULL terminator.
-+                               */
-+                              if (++len < bufsize)
-+                                      *bufptr++ = c;
-+                      }
-+                      break;
-+              }
-+      }
-+
-+      if (bufsize)
-+              *bufptr = '\0';
-+
-+      return len;
-+}
-+
- int cmdline_find_option_bool(const char *cmdline, const char *option)
- {
-       return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
- }
-+
-+int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
-+                      int bufsize)
-+{
-+      return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
-+                                   buffer, bufsize);
-+}
--- 
-2.14.2
-
diff --git a/patches/kernel/0051-x86-tracing-Introduce-a-static-key-for-exception-tra.patch b/patches/kernel/0051-x86-tracing-Introduce-a-static-key-for-exception-tra.patch

new file mode 100644 (file)

index 0000000..efbc800
--- /dev/null
+++ b/patches/kernel/0051-x86-tracing-Introduce-a-static-key-for-exception-tra.patch
@@ -0,0 +1,139 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 28 Aug 2017 08:47:21 +0200
+Subject: [PATCH] x86/tracing: Introduce a static key for exception tracing
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Switching the IDT just for avoiding tracepoints creates a completely
+impenetrable macro/inline/ifdef mess.
+
+There is no point in avoiding tracepoints for most of the traps/exceptions.
+For the more expensive tracepoints, like pagefaults, this can be handled with
+an explicit static key.
+
+Preparatory patch to remove the tracing IDT.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Link: http://lkml.kernel.org/r/20170828064956.593094539@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 2feb1b316d48004d905278c02a55902cab0be8be)
+Signed-off-by: Andy Whitcroft <apw@kathleen.maas>
+(cherry picked from commit 15e0ff2a63fdd93f8881e2ebba5c048c5b601e57)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d58a56e851c339d8d9d311dc9b4fad6abbf8bf19)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/trace/common.h      | 15 +++++++++++++++
+ arch/x86/include/asm/trace/exceptions.h  |  4 +---
+ arch/x86/include/asm/trace/irq_vectors.h |  4 +---
+ arch/x86/kernel/tracepoint.c             |  9 ++++++++-
+ 4 files changed, 25 insertions(+), 7 deletions(-)
+ create mode 100644 arch/x86/include/asm/trace/common.h
+
+diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h
+new file mode 100644
+index 000000000000..b1eb7b18ee8a
+--- /dev/null
++++ b/arch/x86/include/asm/trace/common.h
+@@ -0,0 +1,15 @@
++#ifndef _ASM_TRACE_COMMON_H
++#define _ASM_TRACE_COMMON_H
++
++extern int trace_irq_vector_regfunc(void);
++extern void trace_irq_vector_unregfunc(void);
++
++#ifdef CONFIG_TRACING
++DECLARE_STATIC_KEY_FALSE(trace_irqvectors_key);
++#define trace_irqvectors_enabled()                    \
++      static_branch_unlikely(&trace_irqvectors_key)
++#else
++static inline bool trace_irqvectors_enabled(void) { return false; }
++#endif
++
++#endif
+diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
+index 2422b14c50a7..960a5b50ac3b 100644
+--- a/arch/x86/include/asm/trace/exceptions.h
++++ b/arch/x86/include/asm/trace/exceptions.h
+@@ -5,9 +5,7 @@
+ #define _TRACE_PAGE_FAULT_H
+ 
+ #include <linux/tracepoint.h>
+-
+-extern int trace_irq_vector_regfunc(void);
+-extern void trace_irq_vector_unregfunc(void);
++#include <asm/trace/common.h>
+ 
+ DECLARE_EVENT_CLASS(x86_exceptions,
+ 
+diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
+index 32dd6a9e343c..7825b4426e7e 100644
+--- a/arch/x86/include/asm/trace/irq_vectors.h
++++ b/arch/x86/include/asm/trace/irq_vectors.h
+@@ -5,9 +5,7 @@
+ #define _TRACE_IRQ_VECTORS_H
+ 
+ #include <linux/tracepoint.h>
+-
+-extern int trace_irq_vector_regfunc(void);
+-extern void trace_irq_vector_unregfunc(void);
++#include <asm/trace/common.h>
+ 
+ DECLARE_EVENT_CLASS(x86_irq_vector,
+ 
+diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
+index 15515132bf0d..dd4aa04bb95c 100644
+--- a/arch/x86/kernel/tracepoint.c
++++ b/arch/x86/kernel/tracepoint.c
+@@ -4,9 +4,11 @@
+  * Copyright (C) 2013 Seiji Aguchi <seiji.aguchi@hds.com>
+  *
+  */
++#include <linux/jump_label.h>
++#include <linux/atomic.h>
++
+ #include <asm/hw_irq.h>
+ #include <asm/desc.h>
+-#include <linux/atomic.h>
+ 
+ atomic_t trace_idt_ctr = ATOMIC_INIT(0);
+ struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
+@@ -15,6 +17,7 @@ struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
+ /* No need to be aligned, but done to keep all IDTs defined the same way. */
+ gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
+ 
++DEFINE_STATIC_KEY_FALSE(trace_irqvectors_key);
+ static int trace_irq_vector_refcount;
+ static DEFINE_MUTEX(irq_vector_mutex);
+ 
+@@ -36,6 +39,8 @@ static void switch_idt(void *arg)
+ 
+ int trace_irq_vector_regfunc(void)
+ {
++      static_branch_inc(&trace_irqvectors_key);
++
+       mutex_lock(&irq_vector_mutex);
+       if (!trace_irq_vector_refcount) {
+               set_trace_idt_ctr(1);
+@@ -49,6 +54,8 @@ int trace_irq_vector_regfunc(void)
+ 
+ void trace_irq_vector_unregfunc(void)
+ {
++      static_branch_dec(&trace_irqvectors_key);
++
+       mutex_lock(&irq_vector_mutex);
+       trace_irq_vector_refcount--;
+       if (!trace_irq_vector_refcount) {
+-- 
+2.14.2
+
diff --git a/patches/kernel/0052-mm-x86-mm-Fix-performance-regression-in-get_user_pag.patch b/patches/kernel/0052-mm-x86-mm-Fix-performance-regression-in-get_user_pag.patch

deleted file mode 100644 (file)

index bc60e7a..0000000
--- a/patches/kernel/0052-mm-x86-mm-Fix-performance-regression-in-get_user_pag.patch
+++ /dev/null
@@ -1,192 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
-Date: Sat, 9 Sep 2017 00:56:03 +0300
-Subject: [PATCH] mm, x86/mm: Fix performance regression in
- get_user_pages_fast()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The 0-day test bot found a performance regression that was tracked down to
-switching x86 to the generic get_user_pages_fast() implementation:
-
-  http://lkml.kernel.org/r/20170710024020.GA26389@yexl-desktop
-
-The regression was caused by the fact that we now use local_irq_save() +
-local_irq_restore() in get_user_pages_fast() to disable interrupts.
-In x86 implementation local_irq_disable() + local_irq_enable() was used.
-
-The fix is to make get_user_pages_fast() use local_irq_disable(),
-leaving local_irq_save() for __get_user_pages_fast() that can be called
-with interrupts disabled.
-
-Numbers for pinning a gigabyte of memory, one page a time, 20 repeats:
-
-  Before:  Average: 14.91 ms, stddev: 0.45 ms
-  After:   Average: 10.76 ms, stddev: 0.18 ms
-
-Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Huang Ying <ying.huang@intel.com>
-Cc: Jonathan Corbet <corbet@lwn.net>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: Thorsten Leemhuis <regressions@leemhuis.info>
-Cc: linux-mm@kvack.org
-Fixes: e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
-Link: http://lkml.kernel.org/r/20170908215603.9189-3-kirill.shutemov@linux.intel.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 5b65c4677a57a1d4414212f9995aa0e46a21ff80)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 5241f4b2c68284612e34910305f3234e4a64701b)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- mm/gup.c | 97 ++++++++++++++++++++++++++++++++++++++--------------------------
- 1 file changed, 58 insertions(+), 39 deletions(-)
-
-diff --git a/mm/gup.c b/mm/gup.c
-index 23f01c40c88f..4a789f1c6a27 100644
---- a/mm/gup.c
-+++ b/mm/gup.c
-@@ -1618,6 +1618,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
-       return 1;
- }
- 
-+static void gup_pgd_range(unsigned long addr, unsigned long end,
-+              int write, struct page **pages, int *nr)
-+{
-+      unsigned long next;
-+      pgd_t *pgdp;
-+
-+      pgdp = pgd_offset(current->mm, addr);
-+      do {
-+              pgd_t pgd = READ_ONCE(*pgdp);
-+
-+              next = pgd_addr_end(addr, end);
-+              if (pgd_none(pgd))
-+                      return;
-+              if (unlikely(pgd_huge(pgd))) {
-+                      if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
-+                                        pages, nr))
-+                              return;
-+              } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
-+                      if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
-+                                       PGDIR_SHIFT, next, write, pages, nr))
-+                              return;
-+              } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
-+                      return;
-+      } while (pgdp++, addr = next, addr != end);
-+}
-+
-+#ifndef gup_fast_permitted
-+/*
-+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
-+ * we need to fall back to the slow version:
-+ */
-+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
-+{
-+      unsigned long len, end;
-+
-+      len = (unsigned long) nr_pages << PAGE_SHIFT;
-+      end = start + len;
-+      return end >= start;
-+}
-+#endif
-+
- /*
-  * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
-  * the regular GUP. It will only return non-negative values.
-@@ -1625,10 +1666,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
- int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
-                         struct page **pages)
- {
--      struct mm_struct *mm = current->mm;
-       unsigned long addr, len, end;
--      unsigned long next, flags;
--      pgd_t *pgdp;
-+      unsigned long flags;
-       int nr = 0;
- 
-       start &= PAGE_MASK;
-@@ -1652,45 +1691,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
-        * block IPIs that come from THPs splitting.
-        */
- 
--      local_irq_save(flags);
--      pgdp = pgd_offset(mm, addr);
--      do {
--              pgd_t pgd = READ_ONCE(*pgdp);
--
--              next = pgd_addr_end(addr, end);
--              if (pgd_none(pgd))
--                      break;
--              if (unlikely(pgd_huge(pgd))) {
--                      if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
--                                        pages, &nr))
--                              break;
--              } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
--                      if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
--                                       PGDIR_SHIFT, next, write, pages, &nr))
--                              break;
--              } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
--                      break;
--      } while (pgdp++, addr = next, addr != end);
--      local_irq_restore(flags);
-+      if (gup_fast_permitted(start, nr_pages, write)) {
-+              local_irq_save(flags);
-+              gup_pgd_range(addr, end, write, pages, &nr);
-+              local_irq_restore(flags);
-+      }
- 
-       return nr;
- }
- 
--#ifndef gup_fast_permitted
--/*
-- * Check if it's allowed to use __get_user_pages_fast() for the range, or
-- * we need to fall back to the slow version:
-- */
--bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
--{
--      unsigned long len, end;
--
--      len = (unsigned long) nr_pages << PAGE_SHIFT;
--      end = start + len;
--      return end >= start;
--}
--#endif
--
- /**
-  * get_user_pages_fast() - pin user pages in memory
-  * @start:    starting user address
-@@ -1710,12 +1719,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
- int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-                       struct page **pages)
- {
-+      unsigned long addr, len, end;
-       int nr = 0, ret = 0;
- 
-       start &= PAGE_MASK;
-+      addr = start;
-+      len = (unsigned long) nr_pages << PAGE_SHIFT;
-+      end = start + len;
-+
-+      if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
-+                                      (void __user *)start, len)))
-+              return 0;
- 
-       if (gup_fast_permitted(start, nr_pages, write)) {
--              nr = __get_user_pages_fast(start, nr_pages, write, pages);
-+              local_irq_disable();
-+              gup_pgd_range(addr, end, write, pages, &nr);
-+              local_irq_enable();
-               ret = nr;
-       }
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0052-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch b/patches/kernel/0052-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch

new file mode 100644 (file)

index 0000000..0872493
--- /dev/null
+++ b/patches/kernel/0052-x86-boot-Add-early-cmdline-parsing-for-options-with-.patch
@@ -0,0 +1,189 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 17 Jul 2017 16:10:33 -0500
+Subject: [PATCH] x86/boot: Add early cmdline parsing for options with
+ arguments
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Add a cmdline_find_option() function to look for cmdline options that
+take arguments. The argument is returned in a supplied buffer and the
+argument length (regardless of whether it fits in the supplied buffer)
+is returned, with -1 indicating not found.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Larry Woodman <lwoodman@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matt Fleming <matt@codeblueprint.co.uk>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Toshimitsu Kani <toshi.kani@hpe.com>
+Cc: kasan-dev@googlegroups.com
+Cc: kvm@vger.kernel.org
+Cc: linux-arch@vger.kernel.org
+Cc: linux-doc@vger.kernel.org
+Cc: linux-efi@vger.kernel.org
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/36b5f97492a9745dce27682305f990fc20e5cf8a.1500319216.git.thomas.lendacky@amd.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit e505371dd83963caae1a37ead9524e8d997341be)
+Signed-off-by: Andy Whitcroft <apw@kathleen.maas>
+(cherry picked from commit 37569cd003aa69a57d5666530436c2d973a57b26)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit b9f03418aa9b8ecbb1c7f32ac2bfe68fd21de4f5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cmdline.h |   2 +
+ arch/x86/lib/cmdline.c         | 105 +++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 107 insertions(+)
+
+diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
+index e01f7f7ccb0c..84ae170bc3d0 100644
+--- a/arch/x86/include/asm/cmdline.h
++++ b/arch/x86/include/asm/cmdline.h
+@@ -2,5 +2,7 @@
+ #define _ASM_X86_CMDLINE_H
+ 
+ int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
++int cmdline_find_option(const char *cmdline_ptr, const char *option,
++                      char *buffer, int bufsize);
+ 
+ #endif /* _ASM_X86_CMDLINE_H */
+diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
+index 5cc78bf57232..3261abb21ef4 100644
+--- a/arch/x86/lib/cmdline.c
++++ b/arch/x86/lib/cmdline.c
+@@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
+       return 0;       /* Buffer overrun */
+ }
+ 
++/*
++ * Find a non-boolean option (i.e. option=argument). In accordance with
++ * standard Linux practice, if this option is repeated, this returns the
++ * last instance on the command line.
++ *
++ * @cmdline: the cmdline string
++ * @max_cmdline_size: the maximum size of cmdline
++ * @option: option string to look for
++ * @buffer: memory buffer to return the option argument
++ * @bufsize: size of the supplied memory buffer
++ *
++ * Returns the length of the argument (regardless of if it was
++ * truncated to fit in the buffer), or -1 on not found.
++ */
++static int
++__cmdline_find_option(const char *cmdline, int max_cmdline_size,
++                    const char *option, char *buffer, int bufsize)
++{
++      char c;
++      int pos = 0, len = -1;
++      const char *opptr = NULL;
++      char *bufptr = buffer;
++      enum {
++              st_wordstart = 0,       /* Start of word/after whitespace */
++              st_wordcmp,     /* Comparing this word */
++              st_wordskip,    /* Miscompare, skip */
++              st_bufcpy,      /* Copying this to buffer */
++      } state = st_wordstart;
++
++      if (!cmdline)
++              return -1;      /* No command line */
++
++      /*
++       * This 'pos' check ensures we do not overrun
++       * a non-NULL-terminated 'cmdline'
++       */
++      while (pos++ < max_cmdline_size) {
++              c = *(char *)cmdline++;
++              if (!c)
++                      break;
++
++              switch (state) {
++              case st_wordstart:
++                      if (myisspace(c))
++                              break;
++
++                      state = st_wordcmp;
++                      opptr = option;
++                      /* fall through */
++
++              case st_wordcmp:
++                      if ((c == '=') && !*opptr) {
++                              /*
++                               * We matched all the way to the end of the
++                               * option we were looking for, prepare to
++                               * copy the argument.
++                               */
++                              len = 0;
++                              bufptr = buffer;
++                              state = st_bufcpy;
++                              break;
++                      } else if (c == *opptr++) {
++                              /*
++                               * We are currently matching, so continue
++                               * to the next character on the cmdline.
++                               */
++                              break;
++                      }
++                      state = st_wordskip;
++                      /* fall through */
++
++              case st_wordskip:
++                      if (myisspace(c))
++                              state = st_wordstart;
++                      break;
++
++              case st_bufcpy:
++                      if (myisspace(c)) {
++                              state = st_wordstart;
++                      } else {
++                              /*
++                               * Increment len, but don't overrun the
++                               * supplied buffer and leave room for the
++                               * NULL terminator.
++                               */
++                              if (++len < bufsize)
++                                      *bufptr++ = c;
++                      }
++                      break;
++              }
++      }
++
++      if (bufsize)
++              *bufptr = '\0';
++
++      return len;
++}
++
+ int cmdline_find_option_bool(const char *cmdline, const char *option)
+ {
+       return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
+ }
++
++int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
++                      int bufsize)
++{
++      return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
++                                   buffer, bufsize);
++}
+-- 
+2.14.2
+
diff --git a/patches/kernel/0053-mm-x86-mm-Fix-performance-regression-in-get_user_pag.patch b/patches/kernel/0053-mm-x86-mm-Fix-performance-regression-in-get_user_pag.patch

new file mode 100644 (file)

index 0000000..bc60e7a
--- /dev/null
+++ b/patches/kernel/0053-mm-x86-mm-Fix-performance-regression-in-get_user_pag.patch
@@ -0,0 +1,192 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Sat, 9 Sep 2017 00:56:03 +0300
+Subject: [PATCH] mm, x86/mm: Fix performance regression in
+ get_user_pages_fast()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The 0-day test bot found a performance regression that was tracked down to
+switching x86 to the generic get_user_pages_fast() implementation:
+
+  http://lkml.kernel.org/r/20170710024020.GA26389@yexl-desktop
+
+The regression was caused by the fact that we now use local_irq_save() +
+local_irq_restore() in get_user_pages_fast() to disable interrupts.
+In x86 implementation local_irq_disable() + local_irq_enable() was used.
+
+The fix is to make get_user_pages_fast() use local_irq_disable(),
+leaving local_irq_save() for __get_user_pages_fast() that can be called
+with interrupts disabled.
+
+Numbers for pinning a gigabyte of memory, one page a time, 20 repeats:
+
+  Before:  Average: 14.91 ms, stddev: 0.45 ms
+  After:   Average: 10.76 ms, stddev: 0.18 ms
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Huang Ying <ying.huang@intel.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: linux-mm@kvack.org
+Fixes: e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
+Link: http://lkml.kernel.org/r/20170908215603.9189-3-kirill.shutemov@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 5b65c4677a57a1d4414212f9995aa0e46a21ff80)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 5241f4b2c68284612e34910305f3234e4a64701b)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ mm/gup.c | 97 ++++++++++++++++++++++++++++++++++++++--------------------------
+ 1 file changed, 58 insertions(+), 39 deletions(-)
+
+diff --git a/mm/gup.c b/mm/gup.c
+index 23f01c40c88f..4a789f1c6a27 100644
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -1618,6 +1618,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
+       return 1;
+ }
+ 
++static void gup_pgd_range(unsigned long addr, unsigned long end,
++              int write, struct page **pages, int *nr)
++{
++      unsigned long next;
++      pgd_t *pgdp;
++
++      pgdp = pgd_offset(current->mm, addr);
++      do {
++              pgd_t pgd = READ_ONCE(*pgdp);
++
++              next = pgd_addr_end(addr, end);
++              if (pgd_none(pgd))
++                      return;
++              if (unlikely(pgd_huge(pgd))) {
++                      if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
++                                        pages, nr))
++                              return;
++              } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
++                      if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
++                                       PGDIR_SHIFT, next, write, pages, nr))
++                              return;
++              } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
++                      return;
++      } while (pgdp++, addr = next, addr != end);
++}
++
++#ifndef gup_fast_permitted
++/*
++ * Check if it's allowed to use __get_user_pages_fast() for the range, or
++ * we need to fall back to the slow version:
++ */
++bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
++{
++      unsigned long len, end;
++
++      len = (unsigned long) nr_pages << PAGE_SHIFT;
++      end = start + len;
++      return end >= start;
++}
++#endif
++
+ /*
+  * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
+  * the regular GUP. It will only return non-negative values.
+@@ -1625,10 +1666,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
+ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+                         struct page **pages)
+ {
+-      struct mm_struct *mm = current->mm;
+       unsigned long addr, len, end;
+-      unsigned long next, flags;
+-      pgd_t *pgdp;
++      unsigned long flags;
+       int nr = 0;
+ 
+       start &= PAGE_MASK;
+@@ -1652,45 +1691,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+        * block IPIs that come from THPs splitting.
+        */
+ 
+-      local_irq_save(flags);
+-      pgdp = pgd_offset(mm, addr);
+-      do {
+-              pgd_t pgd = READ_ONCE(*pgdp);
+-
+-              next = pgd_addr_end(addr, end);
+-              if (pgd_none(pgd))
+-                      break;
+-              if (unlikely(pgd_huge(pgd))) {
+-                      if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+-                                        pages, &nr))
+-                              break;
+-              } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
+-                      if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
+-                                       PGDIR_SHIFT, next, write, pages, &nr))
+-                              break;
+-              } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
+-                      break;
+-      } while (pgdp++, addr = next, addr != end);
+-      local_irq_restore(flags);
++      if (gup_fast_permitted(start, nr_pages, write)) {
++              local_irq_save(flags);
++              gup_pgd_range(addr, end, write, pages, &nr);
++              local_irq_restore(flags);
++      }
+ 
+       return nr;
+ }
+ 
+-#ifndef gup_fast_permitted
+-/*
+- * Check if it's allowed to use __get_user_pages_fast() for the range, or
+- * we need to fall back to the slow version:
+- */
+-bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
+-{
+-      unsigned long len, end;
+-
+-      len = (unsigned long) nr_pages << PAGE_SHIFT;
+-      end = start + len;
+-      return end >= start;
+-}
+-#endif
+-
+ /**
+  * get_user_pages_fast() - pin user pages in memory
+  * @start:    starting user address
+@@ -1710,12 +1719,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
+ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+                       struct page **pages)
+ {
++      unsigned long addr, len, end;
+       int nr = 0, ret = 0;
+ 
+       start &= PAGE_MASK;
++      addr = start;
++      len = (unsigned long) nr_pages << PAGE_SHIFT;
++      end = start + len;
++
++      if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
++                                      (void __user *)start, len)))
++              return 0;
+ 
+       if (gup_fast_permitted(start, nr_pages, write)) {
+-              nr = __get_user_pages_fast(start, nr_pages, write, pages);
++              local_irq_disable();
++              gup_pgd_range(addr, end, write, pages, &nr);
++              local_irq_enable();
+               ret = nr;
+       }
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0053-x86-asm-Remove-unnecessary-n-t-in-front-of-CC_SET-fr.patch b/patches/kernel/0053-x86-asm-Remove-unnecessary-n-t-in-front-of-CC_SET-fr.patch

deleted file mode 100644 (file)

index 182565e..0000000
--- a/patches/kernel/0053-x86-asm-Remove-unnecessary-n-t-in-front-of-CC_SET-fr.patch
+++ /dev/null
@@ -1,149 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Uros Bizjak <ubizjak@gmail.com>
-Date: Wed, 6 Sep 2017 17:18:08 +0200
-Subject: [PATCH] x86/asm: Remove unnecessary \n\t in front of CC_SET() from
- asm templates
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-There is no need for \n\t in front of CC_SET(), as the macro already includes these two.
-
-Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/20170906151808.5634-1-ubizjak@gmail.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit 3c52b5c64326d9dcfee4e10611c53ec1b1b20675)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 1c3f29ec5586e3aecfde2c6f83b8786e1aecd9ac)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/archrandom.h |  8 ++++----
- arch/x86/include/asm/bitops.h     | 10 +++++-----
- arch/x86/include/asm/percpu.h     |  2 +-
- arch/x86/include/asm/rmwcc.h      |  2 +-
- 4 files changed, 11 insertions(+), 11 deletions(-)
-
-diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
-index 5b0579abb398..3ac991d81e74 100644
---- a/arch/x86/include/asm/archrandom.h
-+++ b/arch/x86/include/asm/archrandom.h
-@@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
-       bool ok;
-       unsigned int retry = RDRAND_RETRY_LOOPS;
-       do {
--              asm volatile(RDRAND_LONG "\n\t"
-+              asm volatile(RDRAND_LONG
-                            CC_SET(c)
-                            : CC_OUT(c) (ok), "=a" (*v));
-               if (ok)
-@@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
-       bool ok;
-       unsigned int retry = RDRAND_RETRY_LOOPS;
-       do {
--              asm volatile(RDRAND_INT "\n\t"
-+              asm volatile(RDRAND_INT
-                            CC_SET(c)
-                            : CC_OUT(c) (ok), "=a" (*v));
-               if (ok)
-@@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
- static inline bool rdseed_long(unsigned long *v)
- {
-       bool ok;
--      asm volatile(RDSEED_LONG "\n\t"
-+      asm volatile(RDSEED_LONG
-                    CC_SET(c)
-                    : CC_OUT(c) (ok), "=a" (*v));
-       return ok;
-@@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
- static inline bool rdseed_int(unsigned int *v)
- {
-       bool ok;
--      asm volatile(RDSEED_INT "\n\t"
-+      asm volatile(RDSEED_INT
-                    CC_SET(c)
-                    : CC_OUT(c) (ok), "=a" (*v));
-       return ok;
-diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
-index 854022772c5b..8cee8db6dffb 100644
---- a/arch/x86/include/asm/bitops.h
-+++ b/arch/x86/include/asm/bitops.h
-@@ -142,7 +142,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
- static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
- {
-       bool negative;
--      asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
-+      asm volatile(LOCK_PREFIX "andb %2,%1"
-               CC_SET(s)
-               : CC_OUT(s) (negative), ADDR
-               : "ir" ((char) ~(1 << nr)) : "memory");
-@@ -245,7 +245,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
- {
-       bool oldbit;
- 
--      asm("bts %2,%1\n\t"
-+      asm("bts %2,%1"
-           CC_SET(c)
-           : CC_OUT(c) (oldbit), ADDR
-           : "Ir" (nr));
-@@ -285,7 +285,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
- {
-       bool oldbit;
- 
--      asm volatile("btr %2,%1\n\t"
-+      asm volatile("btr %2,%1"
-                    CC_SET(c)
-                    : CC_OUT(c) (oldbit), ADDR
-                    : "Ir" (nr));
-@@ -297,7 +297,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
- {
-       bool oldbit;
- 
--      asm volatile("btc %2,%1\n\t"
-+      asm volatile("btc %2,%1"
-                    CC_SET(c)
-                    : CC_OUT(c) (oldbit), ADDR
-                    : "Ir" (nr) : "memory");
-@@ -328,7 +328,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
- {
-       bool oldbit;
- 
--      asm volatile("bt %2,%1\n\t"
-+      asm volatile("bt %2,%1"
-                    CC_SET(c)
-                    : CC_OUT(c) (oldbit)
-                    : "m" (*(unsigned long *)addr), "Ir" (nr));
-diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
-index 9fa03604b2b3..b21a475fd7ed 100644
---- a/arch/x86/include/asm/percpu.h
-+++ b/arch/x86/include/asm/percpu.h
-@@ -525,7 +525,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
- {
-       bool oldbit;
- 
--      asm volatile("bt "__percpu_arg(2)",%1\n\t"
-+      asm volatile("bt "__percpu_arg(2)",%1"
-                       CC_SET(c)
-                       : CC_OUT(c) (oldbit)
-                       : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
-diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
-index 661dd305694a..dd7ba5aa8dca 100644
---- a/arch/x86/include/asm/rmwcc.h
-+++ b/arch/x86/include/asm/rmwcc.h
-@@ -28,7 +28,7 @@ cc_label:                                                            \
- #define __GEN_RMWcc(fullop, var, cc, ...)                             \
- do {                                                                  \
-       bool c;                                                         \
--      asm volatile (fullop ";" CC_SET(cc)                             \
-+      asm volatile (fullop CC_SET(cc)                                 \
-                       : "+m" (var), CC_OUT(cc) (c)                    \
-                       : __VA_ARGS__ : "memory");                      \
-       return c;                                                       \
--- 
-2.14.2
-
diff --git a/patches/kernel/0054-objtool-Don-t-report-end-of-section-error-after-an-e.patch b/patches/kernel/0054-objtool-Don-t-report-end-of-section-error-after-an-e.patch

deleted file mode 100644 (file)

index 67c73f2..0000000
--- a/patches/kernel/0054-objtool-Don-t-report-end-of-section-error-after-an-e.patch
+++ /dev/null
@@ -1,58 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Mon, 18 Sep 2017 21:43:30 -0500
-Subject: [PATCH] objtool: Don't report end of section error after an empty
- unwind hint
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-If asm code specifies an UNWIND_HINT_EMPTY hint, don't warn if the
-section ends unexpectedly.  This can happen with the xen-head.S code
-because the hypercall_page is "text" but it's all zeros.
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/ddafe199dd8797e40e3c2777373347eba1d65572.1505764066.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 00d96180dc38ef872ac471c2d3e14b067cbd895d)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9d22f903bba24f2ac86de8a81dc1788f9957aca8)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/objtool/check.c | 7 +++++--
- 1 file changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/tools/objtool/check.c b/tools/objtool/check.c
-index 368275de5f23..0a86fd0ac082 100644
---- a/tools/objtool/check.c
-+++ b/tools/objtool/check.c
-@@ -1652,11 +1652,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
-               if (insn->dead_end)
-                       return 0;
- 
--              insn = next_insn;
--              if (!insn) {
-+              if (!next_insn) {
-+                      if (state.cfa.base == CFI_UNDEFINED)
-+                              return 0;
-                       WARN("%s: unexpected end of section", sec->name);
-                       return 1;
-               }
-+
-+              insn = next_insn;
-       }
- 
-       return 0;
--- 
-2.14.2
-
diff --git a/patches/kernel/0054-x86-asm-Remove-unnecessary-n-t-in-front-of-CC_SET-fr.patch b/patches/kernel/0054-x86-asm-Remove-unnecessary-n-t-in-front-of-CC_SET-fr.patch

new file mode 100644 (file)

index 0000000..182565e
--- /dev/null
+++ b/patches/kernel/0054-x86-asm-Remove-unnecessary-n-t-in-front-of-CC_SET-fr.patch
@@ -0,0 +1,149 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Uros Bizjak <ubizjak@gmail.com>
+Date: Wed, 6 Sep 2017 17:18:08 +0200
+Subject: [PATCH] x86/asm: Remove unnecessary \n\t in front of CC_SET() from
+ asm templates
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+There is no need for \n\t in front of CC_SET(), as the macro already includes these two.
+
+Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20170906151808.5634-1-ubizjak@gmail.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit 3c52b5c64326d9dcfee4e10611c53ec1b1b20675)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 1c3f29ec5586e3aecfde2c6f83b8786e1aecd9ac)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/archrandom.h |  8 ++++----
+ arch/x86/include/asm/bitops.h     | 10 +++++-----
+ arch/x86/include/asm/percpu.h     |  2 +-
+ arch/x86/include/asm/rmwcc.h      |  2 +-
+ 4 files changed, 11 insertions(+), 11 deletions(-)
+
+diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
+index 5b0579abb398..3ac991d81e74 100644
+--- a/arch/x86/include/asm/archrandom.h
++++ b/arch/x86/include/asm/archrandom.h
+@@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
+       bool ok;
+       unsigned int retry = RDRAND_RETRY_LOOPS;
+       do {
+-              asm volatile(RDRAND_LONG "\n\t"
++              asm volatile(RDRAND_LONG
+                            CC_SET(c)
+                            : CC_OUT(c) (ok), "=a" (*v));
+               if (ok)
+@@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
+       bool ok;
+       unsigned int retry = RDRAND_RETRY_LOOPS;
+       do {
+-              asm volatile(RDRAND_INT "\n\t"
++              asm volatile(RDRAND_INT
+                            CC_SET(c)
+                            : CC_OUT(c) (ok), "=a" (*v));
+               if (ok)
+@@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
+ static inline bool rdseed_long(unsigned long *v)
+ {
+       bool ok;
+-      asm volatile(RDSEED_LONG "\n\t"
++      asm volatile(RDSEED_LONG
+                    CC_SET(c)
+                    : CC_OUT(c) (ok), "=a" (*v));
+       return ok;
+@@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
+ static inline bool rdseed_int(unsigned int *v)
+ {
+       bool ok;
+-      asm volatile(RDSEED_INT "\n\t"
++      asm volatile(RDSEED_INT
+                    CC_SET(c)
+                    : CC_OUT(c) (ok), "=a" (*v));
+       return ok;
+diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
+index 854022772c5b..8cee8db6dffb 100644
+--- a/arch/x86/include/asm/bitops.h
++++ b/arch/x86/include/asm/bitops.h
+@@ -142,7 +142,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
+ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+ {
+       bool negative;
+-      asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
++      asm volatile(LOCK_PREFIX "andb %2,%1"
+               CC_SET(s)
+               : CC_OUT(s) (negative), ADDR
+               : "ir" ((char) ~(1 << nr)) : "memory");
+@@ -245,7 +245,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
+ {
+       bool oldbit;
+ 
+-      asm("bts %2,%1\n\t"
++      asm("bts %2,%1"
+           CC_SET(c)
+           : CC_OUT(c) (oldbit), ADDR
+           : "Ir" (nr));
+@@ -285,7 +285,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
+ {
+       bool oldbit;
+ 
+-      asm volatile("btr %2,%1\n\t"
++      asm volatile("btr %2,%1"
+                    CC_SET(c)
+                    : CC_OUT(c) (oldbit), ADDR
+                    : "Ir" (nr));
+@@ -297,7 +297,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
+ {
+       bool oldbit;
+ 
+-      asm volatile("btc %2,%1\n\t"
++      asm volatile("btc %2,%1"
+                    CC_SET(c)
+                    : CC_OUT(c) (oldbit), ADDR
+                    : "Ir" (nr) : "memory");
+@@ -328,7 +328,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
+ {
+       bool oldbit;
+ 
+-      asm volatile("bt %2,%1\n\t"
++      asm volatile("bt %2,%1"
+                    CC_SET(c)
+                    : CC_OUT(c) (oldbit)
+                    : "m" (*(unsigned long *)addr), "Ir" (nr));
+diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
+index 9fa03604b2b3..b21a475fd7ed 100644
+--- a/arch/x86/include/asm/percpu.h
++++ b/arch/x86/include/asm/percpu.h
+@@ -525,7 +525,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
+ {
+       bool oldbit;
+ 
+-      asm volatile("bt "__percpu_arg(2)",%1\n\t"
++      asm volatile("bt "__percpu_arg(2)",%1"
+                       CC_SET(c)
+                       : CC_OUT(c) (oldbit)
+                       : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
+diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
+index 661dd305694a..dd7ba5aa8dca 100644
+--- a/arch/x86/include/asm/rmwcc.h
++++ b/arch/x86/include/asm/rmwcc.h
+@@ -28,7 +28,7 @@ cc_label:                                                            \
+ #define __GEN_RMWcc(fullop, var, cc, ...)                             \
+ do {                                                                  \
+       bool c;                                                         \
+-      asm volatile (fullop ";" CC_SET(cc)                             \
++      asm volatile (fullop CC_SET(cc)                                 \
+                       : "+m" (var), CC_OUT(cc) (c)                    \
+                       : __VA_ARGS__ : "memory");                      \
+       return c;                                                       \
+-- 
+2.14.2
+
diff --git a/patches/kernel/0055-objtool-Don-t-report-end-of-section-error-after-an-e.patch b/patches/kernel/0055-objtool-Don-t-report-end-of-section-error-after-an-e.patch

new file mode 100644 (file)

index 0000000..67c73f2
--- /dev/null
+++ b/patches/kernel/0055-objtool-Don-t-report-end-of-section-error-after-an-e.patch
@@ -0,0 +1,58 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 18 Sep 2017 21:43:30 -0500
+Subject: [PATCH] objtool: Don't report end of section error after an empty
+ unwind hint
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+If asm code specifies an UNWIND_HINT_EMPTY hint, don't warn if the
+section ends unexpectedly.  This can happen with the xen-head.S code
+because the hypercall_page is "text" but it's all zeros.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/ddafe199dd8797e40e3c2777373347eba1d65572.1505764066.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 00d96180dc38ef872ac471c2d3e14b067cbd895d)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9d22f903bba24f2ac86de8a81dc1788f9957aca8)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/objtool/check.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/tools/objtool/check.c b/tools/objtool/check.c
+index 368275de5f23..0a86fd0ac082 100644
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -1652,11 +1652,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
+               if (insn->dead_end)
+                       return 0;
+ 
+-              insn = next_insn;
+-              if (!insn) {
++              if (!next_insn) {
++                      if (state.cfa.base == CFI_UNDEFINED)
++                              return 0;
+                       WARN("%s: unexpected end of section", sec->name);
+                       return 1;
+               }
++
++              insn = next_insn;
+       }
+ 
+       return 0;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0055-x86-head-Remove-confusing-comment.patch b/patches/kernel/0055-x86-head-Remove-confusing-comment.patch

deleted file mode 100644 (file)

index 82fe715..0000000
--- a/patches/kernel/0055-x86-head-Remove-confusing-comment.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Mon, 18 Sep 2017 21:43:31 -0500
-Subject: [PATCH] x86/head: Remove confusing comment
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This comment is actively wrong and confusing.  It refers to the
-registers' stack offsets after the pt_regs has been constructed on the
-stack, but this code is *before* that.
-
-At this point the stack just has the standard iret frame, for which no
-comment should be needed.
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/a3c267b770fc56c9b86df9c11c552848248aace2.1505764066.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 17270717e80de33a884ad328fea5f407d87f6d6a)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 49187e0108184688304260a75d29b789f36f3a2b)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/head_64.S | 4 ----
- 1 file changed, 4 deletions(-)
-
-diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
-index 6225550883df..627c798b2f15 100644
---- a/arch/x86/kernel/head_64.S
-+++ b/arch/x86/kernel/head_64.S
-@@ -254,10 +254,6 @@ bad_address:
- 
-       __INIT
- ENTRY(early_idt_handler_array)
--      # 104(%rsp) %rflags
--      #  96(%rsp) %cs
--      #  88(%rsp) %rip
--      #  80(%rsp) error code
-       i = 0
-       .rept NUM_EXCEPTION_VECTORS
-       .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
--- 
-2.14.2
-
diff --git a/patches/kernel/0056-x86-head-Remove-confusing-comment.patch b/patches/kernel/0056-x86-head-Remove-confusing-comment.patch

new file mode 100644 (file)

index 0000000..82fe715
--- /dev/null
+++ b/patches/kernel/0056-x86-head-Remove-confusing-comment.patch
@@ -0,0 +1,54 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 18 Sep 2017 21:43:31 -0500
+Subject: [PATCH] x86/head: Remove confusing comment
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This comment is actively wrong and confusing.  It refers to the
+registers' stack offsets after the pt_regs has been constructed on the
+stack, but this code is *before* that.
+
+At this point the stack just has the standard iret frame, for which no
+comment should be needed.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/a3c267b770fc56c9b86df9c11c552848248aace2.1505764066.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 17270717e80de33a884ad328fea5f407d87f6d6a)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 49187e0108184688304260a75d29b789f36f3a2b)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/head_64.S | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
+index 6225550883df..627c798b2f15 100644
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -254,10 +254,6 @@ bad_address:
+ 
+       __INIT
+ ENTRY(early_idt_handler_array)
+-      # 104(%rsp) %rflags
+-      #  96(%rsp) %cs
+-      #  88(%rsp) %rip
+-      #  80(%rsp) error code
+       i = 0
+       .rept NUM_EXCEPTION_VECTORS
+       .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
+-- 
+2.14.2
+
diff --git a/patches/kernel/0056-x86-head-Remove-unused-bad_address-code.patch b/patches/kernel/0056-x86-head-Remove-unused-bad_address-code.patch

deleted file mode 100644 (file)

index 62a5ad6..0000000
--- a/patches/kernel/0056-x86-head-Remove-unused-bad_address-code.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Mon, 18 Sep 2017 21:43:32 -0500
-Subject: [PATCH] x86/head: Remove unused 'bad_address' code
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-It's no longer possible for this code to be executed, so remove it.
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/32a46fe92d2083700599b36872b26e7dfd7b7965.1505764066.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit a8b88e84d124bc92c4808e72b8b8c0e0bb538630)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d790ff35a3a49ef0942a3484f024551433fd2ddf)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/head_64.S | 3 ---
- 1 file changed, 3 deletions(-)
-
-diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
-index 627c798b2f15..37d9905d38d6 100644
---- a/arch/x86/kernel/head_64.S
-+++ b/arch/x86/kernel/head_64.S
-@@ -249,9 +249,6 @@ ENDPROC(start_cpu0)
-       .quad  init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
-       __FINITDATA
- 
--bad_address:
--      jmp bad_address
--
-       __INIT
- ENTRY(early_idt_handler_array)
-       i = 0
--- 
-2.14.2
-
diff --git a/patches/kernel/0057-x86-head-Fix-head-ELF-function-annotations.patch b/patches/kernel/0057-x86-head-Fix-head-ELF-function-annotations.patch

deleted file mode 100644 (file)

index abf72c8..0000000
--- a/patches/kernel/0057-x86-head-Fix-head-ELF-function-annotations.patch
+++ /dev/null
@@ -1,66 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Mon, 18 Sep 2017 21:43:33 -0500
-Subject: [PATCH] x86/head: Fix head ELF function annotations
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-These functions aren't callable C-type functions, so don't annotate them
-as such.
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/36eb182738c28514f8bf95e403d89b6413a88883.1505764066.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 015a2ea5478680fc5216d56b7ff306f2a74efaf9)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 707517a56928fed1c03eefdb4e00fa57dfddc4fd)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/head_64.S | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
-index 37d9905d38d6..45b18b1a6417 100644
---- a/arch/x86/kernel/head_64.S
-+++ b/arch/x86/kernel/head_64.S
-@@ -218,7 +218,7 @@ ENTRY(secondary_startup_64)
-       pushq   %rax            # target address in negative space
-       lretq
- .Lafter_lret:
--ENDPROC(secondary_startup_64)
-+END(secondary_startup_64)
- 
- #include "verify_cpu.S"
- 
-@@ -261,7 +261,7 @@ ENTRY(early_idt_handler_array)
-       i = i + 1
-       .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
-       .endr
--ENDPROC(early_idt_handler_array)
-+END(early_idt_handler_array)
- 
- early_idt_handler_common:
-       /*
-@@ -304,7 +304,7 @@ early_idt_handler_common:
- 20:
-       decl early_recursion_flag(%rip)
-       jmp restore_regs_and_iret
--ENDPROC(early_idt_handler_common)
-+END(early_idt_handler_common)
- 
-       __INITDATA
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0057-x86-head-Remove-unused-bad_address-code.patch b/patches/kernel/0057-x86-head-Remove-unused-bad_address-code.patch

new file mode 100644 (file)

index 0000000..62a5ad6
--- /dev/null
+++ b/patches/kernel/0057-x86-head-Remove-unused-bad_address-code.patch
@@ -0,0 +1,48 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 18 Sep 2017 21:43:32 -0500
+Subject: [PATCH] x86/head: Remove unused 'bad_address' code
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+It's no longer possible for this code to be executed, so remove it.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/32a46fe92d2083700599b36872b26e7dfd7b7965.1505764066.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit a8b88e84d124bc92c4808e72b8b8c0e0bb538630)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d790ff35a3a49ef0942a3484f024551433fd2ddf)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/head_64.S | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
+index 627c798b2f15..37d9905d38d6 100644
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -249,9 +249,6 @@ ENDPROC(start_cpu0)
+       .quad  init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
+       __FINITDATA
+ 
+-bad_address:
+-      jmp bad_address
+-
+       __INIT
+ ENTRY(early_idt_handler_array)
+       i = 0
+-- 
+2.14.2
+
diff --git a/patches/kernel/0058-x86-boot-Annotate-verify_cpu-as-a-callable-function.patch b/patches/kernel/0058-x86-boot-Annotate-verify_cpu-as-a-callable-function.patch

deleted file mode 100644 (file)

index 23456b4..0000000
--- a/patches/kernel/0058-x86-boot-Annotate-verify_cpu-as-a-callable-function.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Mon, 18 Sep 2017 21:43:34 -0500
-Subject: [PATCH] x86/boot: Annotate verify_cpu() as a callable function
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-verify_cpu() is a callable function.  Annotate it as such.
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/293024b8a080832075312f38c07ccc970fc70292.1505764066.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit e93db75a0054b23a874a12c63376753544f3fe9e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 48a432c46026f864e194cdf9a8133e7c9109274e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/verify_cpu.S | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
-index 014ea59aa153..3d3c2f71f617 100644
---- a/arch/x86/kernel/verify_cpu.S
-+++ b/arch/x86/kernel/verify_cpu.S
-@@ -33,7 +33,7 @@
- #include <asm/cpufeatures.h>
- #include <asm/msr-index.h>
- 
--verify_cpu:
-+ENTRY(verify_cpu)
-       pushf                           # Save caller passed flags
-       push    $0                      # Kill any dangerous flags
-       popf
-@@ -139,3 +139,4 @@ verify_cpu:
-       popf                            # Restore caller passed flags
-       xorl %eax, %eax
-       ret
-+ENDPROC(verify_cpu)
--- 
-2.14.2
-
diff --git a/patches/kernel/0058-x86-head-Fix-head-ELF-function-annotations.patch b/patches/kernel/0058-x86-head-Fix-head-ELF-function-annotations.patch

new file mode 100644 (file)

index 0000000..abf72c8
--- /dev/null
+++ b/patches/kernel/0058-x86-head-Fix-head-ELF-function-annotations.patch
@@ -0,0 +1,66 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 18 Sep 2017 21:43:33 -0500
+Subject: [PATCH] x86/head: Fix head ELF function annotations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+These functions aren't callable C-type functions, so don't annotate them
+as such.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/36eb182738c28514f8bf95e403d89b6413a88883.1505764066.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 015a2ea5478680fc5216d56b7ff306f2a74efaf9)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 707517a56928fed1c03eefdb4e00fa57dfddc4fd)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/head_64.S | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
+index 37d9905d38d6..45b18b1a6417 100644
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -218,7 +218,7 @@ ENTRY(secondary_startup_64)
+       pushq   %rax            # target address in negative space
+       lretq
+ .Lafter_lret:
+-ENDPROC(secondary_startup_64)
++END(secondary_startup_64)
+ 
+ #include "verify_cpu.S"
+ 
+@@ -261,7 +261,7 @@ ENTRY(early_idt_handler_array)
+       i = i + 1
+       .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
+       .endr
+-ENDPROC(early_idt_handler_array)
++END(early_idt_handler_array)
+ 
+ early_idt_handler_common:
+       /*
+@@ -304,7 +304,7 @@ early_idt_handler_common:
+ 20:
+       decl early_recursion_flag(%rip)
+       jmp restore_regs_and_iret
+-ENDPROC(early_idt_handler_common)
++END(early_idt_handler_common)
+ 
+       __INITDATA
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0059-x86-boot-Annotate-verify_cpu-as-a-callable-function.patch b/patches/kernel/0059-x86-boot-Annotate-verify_cpu-as-a-callable-function.patch

new file mode 100644 (file)

index 0000000..23456b4
--- /dev/null
+++ b/patches/kernel/0059-x86-boot-Annotate-verify_cpu-as-a-callable-function.patch
@@ -0,0 +1,52 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 18 Sep 2017 21:43:34 -0500
+Subject: [PATCH] x86/boot: Annotate verify_cpu() as a callable function
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+verify_cpu() is a callable function.  Annotate it as such.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/293024b8a080832075312f38c07ccc970fc70292.1505764066.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit e93db75a0054b23a874a12c63376753544f3fe9e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 48a432c46026f864e194cdf9a8133e7c9109274e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/verify_cpu.S | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
+index 014ea59aa153..3d3c2f71f617 100644
+--- a/arch/x86/kernel/verify_cpu.S
++++ b/arch/x86/kernel/verify_cpu.S
+@@ -33,7 +33,7 @@
+ #include <asm/cpufeatures.h>
+ #include <asm/msr-index.h>
+ 
+-verify_cpu:
++ENTRY(verify_cpu)
+       pushf                           # Save caller passed flags
+       push    $0                      # Kill any dangerous flags
+       popf
+@@ -139,3 +139,4 @@ verify_cpu:
+       popf                            # Restore caller passed flags
+       xorl %eax, %eax
+       ret
++ENDPROC(verify_cpu)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0059-x86-xen-Fix-xen-head-ELF-annotations.patch b/patches/kernel/0059-x86-xen-Fix-xen-head-ELF-annotations.patch

deleted file mode 100644 (file)

index d261ae4..0000000
--- a/patches/kernel/0059-x86-xen-Fix-xen-head-ELF-annotations.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Mon, 18 Sep 2017 21:43:35 -0500
-Subject: [PATCH] x86/xen: Fix xen head ELF annotations
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Mark the ends of the startup_xen and hypercall_page code sections.
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/3a80a394d30af43d9cefa1a29628c45ed8420c97.1505764066.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 2582d3df95c76d3b686453baf90b64d57e87d1e8)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit b9410861f1436c1e38958a9b85009ad252aad9f5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/xen/xen-head.S | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
-index 72a8e6adebe6..2f0cff2cc265 100644
---- a/arch/x86/xen/xen-head.S
-+++ b/arch/x86/xen/xen-head.S
-@@ -33,7 +33,7 @@ ENTRY(startup_xen)
-       mov $init_thread_union+THREAD_SIZE, %_ASM_SP
- 
-       jmp xen_start_kernel
--
-+END(startup_xen)
-       __FINIT
- #endif
- 
-@@ -47,7 +47,7 @@ ENTRY(hypercall_page)
-       .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
- #include <asm/xen-hypercalls.h>
- #undef HYPERCALL
--
-+END(hypercall_page)
- .popsection
- 
-       ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
--- 
-2.14.2
-
diff --git a/patches/kernel/0060-x86-xen-Add-unwind-hint-annotations.patch b/patches/kernel/0060-x86-xen-Add-unwind-hint-annotations.patch

deleted file mode 100644 (file)

index ce10c49..0000000
--- a/patches/kernel/0060-x86-xen-Add-unwind-hint-annotations.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Mon, 18 Sep 2017 21:43:36 -0500
-Subject: [PATCH] x86/xen: Add unwind hint annotations
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Add unwind hint annotations to the xen head code so the ORC unwinder can
-read head_64.o.
-
-hypercall_page needs empty annotations at 32-byte intervals to match the
-'xen_hypercall_*' ELF functions at those locations.
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/70ed2eb516fe9266be766d953f93c2571bca88cc.1505764066.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit abbe1cac6214d81d2f4e149aba64a8760703144e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9f099a90cb39eaff9b3187e8a6d8151c8af53db1)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/xen/xen-head.S | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
-index 2f0cff2cc265..ad189ab2c329 100644
---- a/arch/x86/xen/xen-head.S
-+++ b/arch/x86/xen/xen-head.S
-@@ -9,6 +9,7 @@
- #include <asm/boot.h>
- #include <asm/asm.h>
- #include <asm/page_types.h>
-+#include <asm/unwind_hints.h>
- 
- #include <xen/interface/elfnote.h>
- #include <xen/interface/features.h>
-@@ -19,6 +20,7 @@
- #ifdef CONFIG_XEN_PV
-       __INIT
- ENTRY(startup_xen)
-+      UNWIND_HINT_EMPTY
-       cld
- 
-       /* Clear .bss */
-@@ -40,7 +42,10 @@ END(startup_xen)
- .pushsection .text
-       .balign PAGE_SIZE
- ENTRY(hypercall_page)
--      .skip PAGE_SIZE
-+      .rept (PAGE_SIZE / 32)
-+              UNWIND_HINT_EMPTY
-+              .skip 32
-+      .endr
- 
- #define HYPERCALL(n) \
-       .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
--- 
-2.14.2
-
diff --git a/patches/kernel/0060-x86-xen-Fix-xen-head-ELF-annotations.patch b/patches/kernel/0060-x86-xen-Fix-xen-head-ELF-annotations.patch

new file mode 100644 (file)

index 0000000..d261ae4
--- /dev/null
+++ b/patches/kernel/0060-x86-xen-Fix-xen-head-ELF-annotations.patch
@@ -0,0 +1,56 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 18 Sep 2017 21:43:35 -0500
+Subject: [PATCH] x86/xen: Fix xen head ELF annotations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Mark the ends of the startup_xen and hypercall_page code sections.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/3a80a394d30af43d9cefa1a29628c45ed8420c97.1505764066.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 2582d3df95c76d3b686453baf90b64d57e87d1e8)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit b9410861f1436c1e38958a9b85009ad252aad9f5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/xen/xen-head.S | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
+index 72a8e6adebe6..2f0cff2cc265 100644
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -33,7 +33,7 @@ ENTRY(startup_xen)
+       mov $init_thread_union+THREAD_SIZE, %_ASM_SP
+ 
+       jmp xen_start_kernel
+-
++END(startup_xen)
+       __FINIT
+ #endif
+ 
+@@ -47,7 +47,7 @@ ENTRY(hypercall_page)
+       .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
+ #include <asm/xen-hypercalls.h>
+ #undef HYPERCALL
+-
++END(hypercall_page)
+ .popsection
+ 
+       ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
+-- 
+2.14.2
+
diff --git a/patches/kernel/0061-x86-head-Add-unwind-hint-annotations.patch b/patches/kernel/0061-x86-head-Add-unwind-hint-annotations.patch

deleted file mode 100644 (file)

index 9579011..0000000
--- a/patches/kernel/0061-x86-head-Add-unwind-hint-annotations.patch
+++ /dev/null
@@ -1,134 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Mon, 18 Sep 2017 21:43:37 -0500
-Subject: [PATCH] x86/head: Add unwind hint annotations
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Jiri Slaby reported an ORC issue when unwinding from an idle task.  The
-stack was:
-
-    ffffffff811083c2 do_idle+0x142/0x1e0
-    ffffffff8110861d cpu_startup_entry+0x5d/0x60
-    ffffffff82715f58 start_kernel+0x3ff/0x407
-    ffffffff827153e8 x86_64_start_kernel+0x14e/0x15d
-    ffffffff810001bf secondary_startup_64+0x9f/0xa0
-
-The ORC unwinder errored out at secondary_startup_64 because the head
-code isn't annotated yet so there wasn't a corresponding ORC entry.
-
-Fix that and any other head-related unwinding issues by adding unwind
-hints to the head code.
-
-Reported-by: Jiri Slaby <jslaby@suse.cz>
-Tested-by: Jiri Slaby <jslaby@suse.cz>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/78ef000a2f68f545d6eef44ee912edceaad82ccf.1505764066.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 2704fbb672d0d9a19414907fda7949283dcef6a1)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit b63a868e404e64172afefea553c6a40963a151db)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/Makefile  |  1 -
- arch/x86/kernel/head_64.S | 14 ++++++++++++--
- 2 files changed, 12 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
-index 287eac7d207f..e2315aecc441 100644
---- a/arch/x86/kernel/Makefile
-+++ b/arch/x86/kernel/Makefile
-@@ -26,7 +26,6 @@ KASAN_SANITIZE_dumpstack.o                           := n
- KASAN_SANITIZE_dumpstack_$(BITS).o                    := n
- KASAN_SANITIZE_stacktrace.o := n
- 
--OBJECT_FILES_NON_STANDARD_head_$(BITS).o              := y
- OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o   := y
- OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o            := y
- OBJECT_FILES_NON_STANDARD_test_nx.o                   := y
-diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
-index 45b18b1a6417..d081bc7a027d 100644
---- a/arch/x86/kernel/head_64.S
-+++ b/arch/x86/kernel/head_64.S
-@@ -49,6 +49,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
-       .code64
-       .globl startup_64
- startup_64:
-+      UNWIND_HINT_EMPTY
-       /*
-        * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
-        * and someone has loaded an identity mapped page table
-@@ -81,6 +82,7 @@ startup_64:
-       movq    $(early_top_pgt - __START_KERNEL_map), %rax
-       jmp 1f
- ENTRY(secondary_startup_64)
-+      UNWIND_HINT_EMPTY
-       /*
-        * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
-        * and someone has loaded a mapped page table.
-@@ -116,6 +118,7 @@ ENTRY(secondary_startup_64)
-       movq    $1f, %rax
-       jmp     *%rax
- 1:
-+      UNWIND_HINT_EMPTY
- 
-       /* Check if nx is implemented */
-       movl    $0x80000001, %eax
-@@ -230,6 +233,7 @@ END(secondary_startup_64)
-  */
- ENTRY(start_cpu0)
-       movq    initial_stack(%rip), %rsp
-+      UNWIND_HINT_EMPTY
-       jmp     .Ljump_to_C_code
- ENDPROC(start_cpu0)
- #endif
-@@ -254,13 +258,18 @@ ENTRY(early_idt_handler_array)
-       i = 0
-       .rept NUM_EXCEPTION_VECTORS
-       .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
--      pushq $0                # Dummy error code, to make stack frame uniform
-+              UNWIND_HINT_IRET_REGS
-+              pushq $0        # Dummy error code, to make stack frame uniform
-+      .else
-+              UNWIND_HINT_IRET_REGS offset=8
-       .endif
-       pushq $i                # 72(%rsp) Vector number
-       jmp early_idt_handler_common
-+      UNWIND_HINT_IRET_REGS
-       i = i + 1
-       .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
-       .endr
-+      UNWIND_HINT_IRET_REGS offset=16
- END(early_idt_handler_array)
- 
- early_idt_handler_common:
-@@ -289,6 +298,7 @@ early_idt_handler_common:
-       pushq %r13                              /* pt_regs->r13 */
-       pushq %r14                              /* pt_regs->r14 */
-       pushq %r15                              /* pt_regs->r15 */
-+      UNWIND_HINT_REGS
- 
-       cmpq $14,%rsi           /* Page fault? */
-       jnz 10f
-@@ -411,7 +421,7 @@ ENTRY(phys_base)
- EXPORT_SYMBOL(phys_base)
- 
- #include "../../x86/xen/xen-head.S"
--      
-+
-       __PAGE_ALIGNED_BSS
- NEXT_PAGE(empty_zero_page)
-       .skip PAGE_SIZE
--- 
-2.14.2
-
diff --git a/patches/kernel/0061-x86-xen-Add-unwind-hint-annotations.patch b/patches/kernel/0061-x86-xen-Add-unwind-hint-annotations.patch

new file mode 100644 (file)

index 0000000..ce10c49
--- /dev/null
+++ b/patches/kernel/0061-x86-xen-Add-unwind-hint-annotations.patch
@@ -0,0 +1,70 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 18 Sep 2017 21:43:36 -0500
+Subject: [PATCH] x86/xen: Add unwind hint annotations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Add unwind hint annotations to the xen head code so the ORC unwinder can
+read head_64.o.
+
+hypercall_page needs empty annotations at 32-byte intervals to match the
+'xen_hypercall_*' ELF functions at those locations.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/70ed2eb516fe9266be766d953f93c2571bca88cc.1505764066.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit abbe1cac6214d81d2f4e149aba64a8760703144e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9f099a90cb39eaff9b3187e8a6d8151c8af53db1)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/xen/xen-head.S | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
+index 2f0cff2cc265..ad189ab2c329 100644
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -9,6 +9,7 @@
+ #include <asm/boot.h>
+ #include <asm/asm.h>
+ #include <asm/page_types.h>
++#include <asm/unwind_hints.h>
+ 
+ #include <xen/interface/elfnote.h>
+ #include <xen/interface/features.h>
+@@ -19,6 +20,7 @@
+ #ifdef CONFIG_XEN_PV
+       __INIT
+ ENTRY(startup_xen)
++      UNWIND_HINT_EMPTY
+       cld
+ 
+       /* Clear .bss */
+@@ -40,7 +42,10 @@ END(startup_xen)
+ .pushsection .text
+       .balign PAGE_SIZE
+ ENTRY(hypercall_page)
+-      .skip PAGE_SIZE
++      .rept (PAGE_SIZE / 32)
++              UNWIND_HINT_EMPTY
++              .skip 32
++      .endr
+ 
+ #define HYPERCALL(n) \
+       .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
+-- 
+2.14.2
+
diff --git a/patches/kernel/0062-ACPI-APEI-adjust-a-local-variable-type-in-ghes_iorem.patch b/patches/kernel/0062-ACPI-APEI-adjust-a-local-variable-type-in-ghes_iorem.patch

deleted file mode 100644 (file)

index 78d3cb4..0000000
--- a/patches/kernel/0062-ACPI-APEI-adjust-a-local-variable-type-in-ghes_iorem.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <JBeulich@suse.com>
-Date: Mon, 25 Sep 2017 02:06:19 -0600
-Subject: [PATCH] ACPI / APEI: adjust a local variable type in
- ghes_ioremap_pfn_irq()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Match up with what 7edda0886b ("acpi: apei: handle SEA notification
-type for ARMv8") did for ghes_ioremap_pfn_nmi().
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-(cherry picked from commit 095f613c6b386a1704b73a549e9ba66c1d5381ae)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 0a5c092882b0ead111dc3a6bbaa870665b54d796)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- drivers/acpi/apei/ghes.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
-index d661d452b238..3628078ee351 100644
---- a/drivers/acpi/apei/ghes.c
-+++ b/drivers/acpi/apei/ghes.c
-@@ -174,7 +174,8 @@ static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
- 
- static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
- {
--      unsigned long vaddr, paddr;
-+      unsigned long vaddr;
-+      phys_addr_t paddr;
-       pgprot_t prot;
- 
-       vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
--- 
-2.14.2
-
diff --git a/patches/kernel/0062-x86-head-Add-unwind-hint-annotations.patch b/patches/kernel/0062-x86-head-Add-unwind-hint-annotations.patch

new file mode 100644 (file)

index 0000000..9579011
--- /dev/null
+++ b/patches/kernel/0062-x86-head-Add-unwind-hint-annotations.patch
@@ -0,0 +1,134 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 18 Sep 2017 21:43:37 -0500
+Subject: [PATCH] x86/head: Add unwind hint annotations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Jiri Slaby reported an ORC issue when unwinding from an idle task.  The
+stack was:
+
+    ffffffff811083c2 do_idle+0x142/0x1e0
+    ffffffff8110861d cpu_startup_entry+0x5d/0x60
+    ffffffff82715f58 start_kernel+0x3ff/0x407
+    ffffffff827153e8 x86_64_start_kernel+0x14e/0x15d
+    ffffffff810001bf secondary_startup_64+0x9f/0xa0
+
+The ORC unwinder errored out at secondary_startup_64 because the head
+code isn't annotated yet so there wasn't a corresponding ORC entry.
+
+Fix that and any other head-related unwinding issues by adding unwind
+hints to the head code.
+
+Reported-by: Jiri Slaby <jslaby@suse.cz>
+Tested-by: Jiri Slaby <jslaby@suse.cz>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/78ef000a2f68f545d6eef44ee912edceaad82ccf.1505764066.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 2704fbb672d0d9a19414907fda7949283dcef6a1)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit b63a868e404e64172afefea553c6a40963a151db)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/Makefile  |  1 -
+ arch/x86/kernel/head_64.S | 14 ++++++++++++--
+ 2 files changed, 12 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
+index 287eac7d207f..e2315aecc441 100644
+--- a/arch/x86/kernel/Makefile
++++ b/arch/x86/kernel/Makefile
+@@ -26,7 +26,6 @@ KASAN_SANITIZE_dumpstack.o                           := n
+ KASAN_SANITIZE_dumpstack_$(BITS).o                    := n
+ KASAN_SANITIZE_stacktrace.o := n
+ 
+-OBJECT_FILES_NON_STANDARD_head_$(BITS).o              := y
+ OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o   := y
+ OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o            := y
+ OBJECT_FILES_NON_STANDARD_test_nx.o                   := y
+diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
+index 45b18b1a6417..d081bc7a027d 100644
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -49,6 +49,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
+       .code64
+       .globl startup_64
+ startup_64:
++      UNWIND_HINT_EMPTY
+       /*
+        * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
+        * and someone has loaded an identity mapped page table
+@@ -81,6 +82,7 @@ startup_64:
+       movq    $(early_top_pgt - __START_KERNEL_map), %rax
+       jmp 1f
+ ENTRY(secondary_startup_64)
++      UNWIND_HINT_EMPTY
+       /*
+        * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
+        * and someone has loaded a mapped page table.
+@@ -116,6 +118,7 @@ ENTRY(secondary_startup_64)
+       movq    $1f, %rax
+       jmp     *%rax
+ 1:
++      UNWIND_HINT_EMPTY
+ 
+       /* Check if nx is implemented */
+       movl    $0x80000001, %eax
+@@ -230,6 +233,7 @@ END(secondary_startup_64)
+  */
+ ENTRY(start_cpu0)
+       movq    initial_stack(%rip), %rsp
++      UNWIND_HINT_EMPTY
+       jmp     .Ljump_to_C_code
+ ENDPROC(start_cpu0)
+ #endif
+@@ -254,13 +258,18 @@ ENTRY(early_idt_handler_array)
+       i = 0
+       .rept NUM_EXCEPTION_VECTORS
+       .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
+-      pushq $0                # Dummy error code, to make stack frame uniform
++              UNWIND_HINT_IRET_REGS
++              pushq $0        # Dummy error code, to make stack frame uniform
++      .else
++              UNWIND_HINT_IRET_REGS offset=8
+       .endif
+       pushq $i                # 72(%rsp) Vector number
+       jmp early_idt_handler_common
++      UNWIND_HINT_IRET_REGS
+       i = i + 1
+       .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
+       .endr
++      UNWIND_HINT_IRET_REGS offset=16
+ END(early_idt_handler_array)
+ 
+ early_idt_handler_common:
+@@ -289,6 +298,7 @@ early_idt_handler_common:
+       pushq %r13                              /* pt_regs->r13 */
+       pushq %r14                              /* pt_regs->r14 */
+       pushq %r15                              /* pt_regs->r15 */
++      UNWIND_HINT_REGS
+ 
+       cmpq $14,%rsi           /* Page fault? */
+       jnz 10f
+@@ -411,7 +421,7 @@ ENTRY(phys_base)
+ EXPORT_SYMBOL(phys_base)
+ 
+ #include "../../x86/xen/xen-head.S"
+-      
++
+       __PAGE_ALIGNED_BSS
+ NEXT_PAGE(empty_zero_page)
+       .skip PAGE_SIZE
+-- 
+2.14.2
+
diff --git a/patches/kernel/0063-ACPI-APEI-adjust-a-local-variable-type-in-ghes_iorem.patch b/patches/kernel/0063-ACPI-APEI-adjust-a-local-variable-type-in-ghes_iorem.patch

new file mode 100644 (file)

index 0000000..78d3cb4
--- /dev/null
+++ b/patches/kernel/0063-ACPI-APEI-adjust-a-local-variable-type-in-ghes_iorem.patch
@@ -0,0 +1,43 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <JBeulich@suse.com>
+Date: Mon, 25 Sep 2017 02:06:19 -0600
+Subject: [PATCH] ACPI / APEI: adjust a local variable type in
+ ghes_ioremap_pfn_irq()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Match up with what 7edda0886b ("acpi: apei: handle SEA notification
+type for ARMv8") did for ghes_ioremap_pfn_nmi().
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+(cherry picked from commit 095f613c6b386a1704b73a549e9ba66c1d5381ae)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 0a5c092882b0ead111dc3a6bbaa870665b54d796)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/acpi/apei/ghes.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
+index d661d452b238..3628078ee351 100644
+--- a/drivers/acpi/apei/ghes.c
++++ b/drivers/acpi/apei/ghes.c
+@@ -174,7 +174,8 @@ static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
+ 
+ static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
+ {
+-      unsigned long vaddr, paddr;
++      unsigned long vaddr;
++      phys_addr_t paddr;
+       pgprot_t prot;
+ 
+       vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0063-x86-unwinder-Make-CONFIG_UNWINDER_ORC-y-the-default-.patch b/patches/kernel/0063-x86-unwinder-Make-CONFIG_UNWINDER_ORC-y-the-default-.patch

deleted file mode 100644 (file)

index a04c95d..0000000
--- a/patches/kernel/0063-x86-unwinder-Make-CONFIG_UNWINDER_ORC-y-the-default-.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Ingo Molnar <mingo@kernel.org>
-Date: Thu, 12 Oct 2017 09:24:30 +0200
-Subject: [PATCH] x86/unwinder: Make CONFIG_UNWINDER_ORC=y the default in the
- 64-bit defconfig
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Increase testing coverage by turning on the primary x86 unwinder for
-the 64-bit defconfig.
-
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-kernel@vger.kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 1e4078f0bba46ad61b69548abe6a6faf63b89380)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit ebcba768c005dce435721f6c998e3afdf5534666)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/configs/x86_64_defconfig | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
-index 4a4b16e56d35..eb65c248708d 100644
---- a/arch/x86/configs/x86_64_defconfig
-+++ b/arch/x86/configs/x86_64_defconfig
-@@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
- # CONFIG_DEBUG_RODATA_TEST is not set
- CONFIG_DEBUG_BOOT_PARAMS=y
- CONFIG_OPTIMIZE_INLINING=y
-+CONFIG_ORC_UNWINDER=y
- CONFIG_SECURITY=y
- CONFIG_SECURITY_NETWORK=y
- CONFIG_SECURITY_SELINUX=y
--- 
-2.14.2
-
diff --git a/patches/kernel/0064-x86-fpu-debug-Remove-unused-x86_fpu_state-and-x86_fp.patch b/patches/kernel/0064-x86-fpu-debug-Remove-unused-x86_fpu_state-and-x86_fp.patch

deleted file mode 100644 (file)

index be4f5ca..0000000
--- a/patches/kernel/0064-x86-fpu-debug-Remove-unused-x86_fpu_state-and-x86_fp.patch
+++ /dev/null
@@ -1,66 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
-Date: Thu, 12 Oct 2017 18:06:19 -0400
-Subject: [PATCH] x86/fpu/debug: Remove unused 'x86_fpu_state' and
- 'x86_fpu_deactivate_state' tracepoints
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Commit:
-
-  d1898b733619 ("x86/fpu: Add tracepoints to dump FPU state at key points")
-
-... added the 'x86_fpu_state' and 'x86_fpu_deactivate_state' trace points,
-but never used them. Today they are still not used. As they take up
-and waste memory, remove them.
-
-Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/20171012180619.670b68b6@gandalf.local.home
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 127a1bea40f7f2a36bc7207ea4d51bb6b4e936fa)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit c7c367ddb6ffb6af2cfee287960e97c4aefc6548)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/trace/fpu.h | 10 ----------
- 1 file changed, 10 deletions(-)
-
-diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
-index 342e59789fcd..fed7d9ecae60 100644
---- a/arch/x86/include/asm/trace/fpu.h
-+++ b/arch/x86/include/asm/trace/fpu.h
-@@ -36,11 +36,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
-       )
- );
- 
--DEFINE_EVENT(x86_fpu, x86_fpu_state,
--      TP_PROTO(struct fpu *fpu),
--      TP_ARGS(fpu)
--);
--
- DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
-       TP_PROTO(struct fpu *fpu),
-       TP_ARGS(fpu)
-@@ -76,11 +71,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
-       TP_ARGS(fpu)
- );
- 
--DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
--      TP_PROTO(struct fpu *fpu),
--      TP_ARGS(fpu)
--);
--
- DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
-       TP_PROTO(struct fpu *fpu),
-       TP_ARGS(fpu)
--- 
-2.14.2
-
diff --git a/patches/kernel/0064-x86-unwinder-Make-CONFIG_UNWINDER_ORC-y-the-default-.patch b/patches/kernel/0064-x86-unwinder-Make-CONFIG_UNWINDER_ORC-y-the-default-.patch

new file mode 100644 (file)

index 0000000..a04c95d
--- /dev/null
+++ b/patches/kernel/0064-x86-unwinder-Make-CONFIG_UNWINDER_ORC-y-the-default-.patch
@@ -0,0 +1,44 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@kernel.org>
+Date: Thu, 12 Oct 2017 09:24:30 +0200
+Subject: [PATCH] x86/unwinder: Make CONFIG_UNWINDER_ORC=y the default in the
+ 64-bit defconfig
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Increase testing coverage by turning on the primary x86 unwinder for
+the 64-bit defconfig.
+
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 1e4078f0bba46ad61b69548abe6a6faf63b89380)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit ebcba768c005dce435721f6c998e3afdf5534666)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/configs/x86_64_defconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
+index 4a4b16e56d35..eb65c248708d 100644
+--- a/arch/x86/configs/x86_64_defconfig
++++ b/arch/x86/configs/x86_64_defconfig
+@@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
+ # CONFIG_DEBUG_RODATA_TEST is not set
+ CONFIG_DEBUG_BOOT_PARAMS=y
+ CONFIG_OPTIMIZE_INLINING=y
++CONFIG_ORC_UNWINDER=y
+ CONFIG_SECURITY=y
+ CONFIG_SECURITY_NETWORK=y
+ CONFIG_SECURITY_SELINUX=y
+-- 
+2.14.2
+
diff --git a/patches/kernel/0065-x86-fpu-debug-Remove-unused-x86_fpu_state-and-x86_fp.patch b/patches/kernel/0065-x86-fpu-debug-Remove-unused-x86_fpu_state-and-x86_fp.patch

new file mode 100644 (file)

index 0000000..be4f5ca
--- /dev/null
+++ b/patches/kernel/0065-x86-fpu-debug-Remove-unused-x86_fpu_state-and-x86_fp.patch
@@ -0,0 +1,66 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Thu, 12 Oct 2017 18:06:19 -0400
+Subject: [PATCH] x86/fpu/debug: Remove unused 'x86_fpu_state' and
+ 'x86_fpu_deactivate_state' tracepoints
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Commit:
+
+  d1898b733619 ("x86/fpu: Add tracepoints to dump FPU state at key points")
+
+... added the 'x86_fpu_state' and 'x86_fpu_deactivate_state' trace points,
+but never used them. Today they are still not used. As they take up
+and waste memory, remove them.
+
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20171012180619.670b68b6@gandalf.local.home
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 127a1bea40f7f2a36bc7207ea4d51bb6b4e936fa)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit c7c367ddb6ffb6af2cfee287960e97c4aefc6548)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/trace/fpu.h | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
+index 342e59789fcd..fed7d9ecae60 100644
+--- a/arch/x86/include/asm/trace/fpu.h
++++ b/arch/x86/include/asm/trace/fpu.h
+@@ -36,11 +36,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
+       )
+ );
+ 
+-DEFINE_EVENT(x86_fpu, x86_fpu_state,
+-      TP_PROTO(struct fpu *fpu),
+-      TP_ARGS(fpu)
+-);
+-
+ DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
+       TP_PROTO(struct fpu *fpu),
+       TP_ARGS(fpu)
+@@ -76,11 +71,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
+       TP_ARGS(fpu)
+ );
+ 
+-DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
+-      TP_PROTO(struct fpu *fpu),
+-      TP_ARGS(fpu)
+-);
+-
+ DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
+       TP_PROTO(struct fpu *fpu),
+       TP_ARGS(fpu)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0065-x86-unwind-Rename-unwinder-config-options-to-CONFIG_.patch b/patches/kernel/0065-x86-unwind-Rename-unwinder-config-options-to-CONFIG_.patch

deleted file mode 100644 (file)

index 9689db5..0000000
--- a/patches/kernel/0065-x86-unwind-Rename-unwinder-config-options-to-CONFIG_.patch
+++ /dev/null
@@ -1,273 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Fri, 13 Oct 2017 15:02:00 -0500
-Subject: [PATCH] x86/unwind: Rename unwinder config options to
- 'CONFIG_UNWINDER_*'
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Rename the unwinder config options from:
-
-  CONFIG_ORC_UNWINDER
-  CONFIG_FRAME_POINTER_UNWINDER
-  CONFIG_GUESS_UNWINDER
-
-to:
-
-  CONFIG_UNWINDER_ORC
-  CONFIG_UNWINDER_FRAME_POINTER
-  CONFIG_UNWINDER_GUESS
-
-... in order to give them a more logical config namespace.
-
-Suggested-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/73972fc7e2762e91912c6b9584582703d6f1b8cc.1507924831.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 11af847446ed0d131cf24d16a7ef3d5ea7a49554)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 27ab2a240a797b073ce63385b1d5db06e44fc3ae)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/x86/orc-unwinder.txt |  2 +-
- Makefile                           |  4 ++--
- arch/x86/kernel/Makefile           |  6 +++---
- scripts/Makefile.build             |  2 +-
- arch/x86/include/asm/module.h      |  2 +-
- arch/x86/include/asm/unwind.h      |  8 ++++----
- include/asm-generic/vmlinux.lds.h  |  2 +-
- arch/x86/Kconfig                   |  2 +-
- arch/x86/Kconfig.debug             | 10 +++++-----
- arch/x86/configs/tiny.config       |  4 ++--
- arch/x86/configs/x86_64_defconfig  |  2 +-
- lib/Kconfig.debug                  |  2 +-
- 12 files changed, 23 insertions(+), 23 deletions(-)
-
-diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
-index af0c9a4c65a6..cd4b29be29af 100644
---- a/Documentation/x86/orc-unwinder.txt
-+++ b/Documentation/x86/orc-unwinder.txt
-@@ -4,7 +4,7 @@ ORC unwinder
- Overview
- --------
- 
--The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
-+The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
- similar in concept to a DWARF unwinder.  The difference is that the
- format of the ORC data is much simpler than DWARF, which in turn allows
- the ORC unwinder to be much simpler and faster.
-diff --git a/Makefile b/Makefile
-index 490ce18685ea..b740e3dc9ff8 100644
---- a/Makefile
-+++ b/Makefile
-@@ -965,8 +965,8 @@ ifdef CONFIG_STACK_VALIDATION
-   ifeq ($(has_libelf),1)
-     objtool_target := tools/objtool FORCE
-   else
--    ifdef CONFIG_ORC_UNWINDER
--      $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
-+    ifdef CONFIG_UNWINDER_ORC
-+      $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
-     else
-       $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
-     endif
-diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
-index e2315aecc441..5bf0d5a473b4 100644
---- a/arch/x86/kernel/Makefile
-+++ b/arch/x86/kernel/Makefile
-@@ -125,9 +125,9 @@ obj-$(CONFIG_PERF_EVENTS)          += perf_regs.o
- obj-$(CONFIG_TRACING)                 += tracepoint.o
- obj-$(CONFIG_SCHED_MC_PRIO)           += itmt.o
- 
--obj-$(CONFIG_ORC_UNWINDER)            += unwind_orc.o
--obj-$(CONFIG_FRAME_POINTER_UNWINDER)  += unwind_frame.o
--obj-$(CONFIG_GUESS_UNWINDER)          += unwind_guess.o
-+obj-$(CONFIG_UNWINDER_ORC)            += unwind_orc.o
-+obj-$(CONFIG_UNWINDER_FRAME_POINTER)  += unwind_frame.o
-+obj-$(CONFIG_UNWINDER_GUESS)          += unwind_guess.o
- 
- ###
- # 64 bit specific files
-diff --git a/scripts/Makefile.build b/scripts/Makefile.build
-index ab2c8ef43cdb..436005392047 100644
---- a/scripts/Makefile.build
-+++ b/scripts/Makefile.build
-@@ -258,7 +258,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1)
- 
- __objtool_obj := $(objtree)/tools/objtool/objtool
- 
--objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check)
-+objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
- 
- ifndef CONFIG_FRAME_POINTER
- objtool_args += --no-fp
-diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
-index 9eb7c718aaf8..9f05a1002aa9 100644
---- a/arch/x86/include/asm/module.h
-+++ b/arch/x86/include/asm/module.h
-@@ -5,7 +5,7 @@
- #include <asm/orc_types.h>
- 
- struct mod_arch_specific {
--#ifdef CONFIG_ORC_UNWINDER
-+#ifdef CONFIG_UNWINDER_ORC
-       unsigned int num_orcs;
-       int *orc_unwind_ip;
-       struct orc_entry *orc_unwind;
-diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
-index e9f793e2df7a..35d67dc7b69f 100644
---- a/arch/x86/include/asm/unwind.h
-+++ b/arch/x86/include/asm/unwind.h
-@@ -12,11 +12,11 @@ struct unwind_state {
-       struct task_struct *task;
-       int graph_idx;
-       bool error;
--#if defined(CONFIG_ORC_UNWINDER)
-+#if defined(CONFIG_UNWINDER_ORC)
-       bool signal, full_regs;
-       unsigned long sp, bp, ip;
-       struct pt_regs *regs;
--#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
-+#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
-       bool got_irq;
-       unsigned long *bp, *orig_sp, ip;
-       struct pt_regs *regs;
-@@ -50,7 +50,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
-       __unwind_start(state, task, regs, first_frame);
- }
- 
--#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
-+#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
- static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
- {
-       if (unwind_done(state))
-@@ -65,7 +65,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
- }
- #endif
- 
--#ifdef CONFIG_ORC_UNWINDER
-+#ifdef CONFIG_UNWINDER_ORC
- void unwind_init(void);
- void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
-                       void *orc, size_t orc_size);
-diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
-index 9fdb54a95976..e71e42432360 100644
---- a/include/asm-generic/vmlinux.lds.h
-+++ b/include/asm-generic/vmlinux.lds.h
-@@ -686,7 +686,7 @@
- #define BUG_TABLE
- #endif
- 
--#ifdef CONFIG_ORC_UNWINDER
-+#ifdef CONFIG_UNWINDER_ORC
- #define ORC_UNWIND_TABLE                                              \
-       . = ALIGN(4);                                                   \
-       .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) {       \
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 3a0b8cb57caf..bf9f03740c30 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -168,7 +168,7 @@ config X86
-       select HAVE_PERF_REGS
-       select HAVE_PERF_USER_STACK_DUMP
-       select HAVE_REGS_AND_STACK_ACCESS_API
--      select HAVE_RELIABLE_STACKTRACE         if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
-+      select HAVE_RELIABLE_STACKTRACE         if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
-       select HAVE_STACK_VALIDATION            if X86_64
-       select HAVE_SYSCALL_TRACEPOINTS
-       select HAVE_UNSTABLE_SCHED_CLOCK
-diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
-index c441b5d65ec8..5435a943f894 100644
---- a/arch/x86/Kconfig.debug
-+++ b/arch/x86/Kconfig.debug
-@@ -358,13 +358,13 @@ config PUNIT_ATOM_DEBUG
- 
- choice
-       prompt "Choose kernel unwinder"
--      default FRAME_POINTER_UNWINDER
-+      default UNWINDER_FRAME_POINTER
-       ---help---
-         This determines which method will be used for unwinding kernel stack
-         traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
-         livepatch, lockdep, and more.
- 
--config FRAME_POINTER_UNWINDER
-+config UNWINDER_FRAME_POINTER
-       bool "Frame pointer unwinder"
-       select FRAME_POINTER
-       ---help---
-@@ -379,7 +379,7 @@ config FRAME_POINTER_UNWINDER
-         consistency model, as this is currently the only way to get a
-         reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
- 
--config ORC_UNWINDER
-+config UNWINDER_ORC
-       bool "ORC unwinder"
-       depends on X86_64
-       select STACK_VALIDATION
-@@ -396,7 +396,7 @@ config ORC_UNWINDER
-         Enabling this option will increase the kernel's runtime memory usage
-         by roughly 2-4MB, depending on your kernel config.
- 
--config GUESS_UNWINDER
-+config UNWINDER_GUESS
-       bool "Guess unwinder"
-       depends on EXPERT
-       ---help---
-@@ -411,7 +411,7 @@ config GUESS_UNWINDER
- endchoice
- 
- config FRAME_POINTER
--      depends on !ORC_UNWINDER && !GUESS_UNWINDER
-+      depends on !UNWINDER_ORC && !UNWINDER_GUESS
-       bool
- 
- endmenu
-diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
-index 550cd5012b73..66c9e2aab16c 100644
---- a/arch/x86/configs/tiny.config
-+++ b/arch/x86/configs/tiny.config
-@@ -1,5 +1,5 @@
- CONFIG_NOHIGHMEM=y
- # CONFIG_HIGHMEM4G is not set
- # CONFIG_HIGHMEM64G is not set
--CONFIG_GUESS_UNWINDER=y
--# CONFIG_FRAME_POINTER_UNWINDER is not set
-+CONFIG_UNWINDER_GUESS=y
-+# CONFIG_UNWINDER_FRAME_POINTER is not set
-diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
-index eb65c248708d..e32fc1f274d8 100644
---- a/arch/x86/configs/x86_64_defconfig
-+++ b/arch/x86/configs/x86_64_defconfig
-@@ -299,7 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
- # CONFIG_DEBUG_RODATA_TEST is not set
- CONFIG_DEBUG_BOOT_PARAMS=y
- CONFIG_OPTIMIZE_INLINING=y
--CONFIG_ORC_UNWINDER=y
-+CONFIG_UNWINDER_ORC=y
- CONFIG_SECURITY=y
- CONFIG_SECURITY_NETWORK=y
- CONFIG_SECURITY_SELINUX=y
-diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index 0b4d1b3880b0..4f6ca5f60f7e 100644
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -375,7 +375,7 @@ config STACK_VALIDATION
-         that runtime stack traces are more reliable.
- 
-         This is also a prerequisite for generation of ORC unwind data, which
--        is needed for CONFIG_ORC_UNWINDER.
-+        is needed for CONFIG_UNWINDER_ORC.
- 
-         For more information, see
-         tools/objtool/Documentation/stack-validation.txt.
--- 
-2.14.2
-
diff --git a/patches/kernel/0066-x86-unwind-Make-CONFIG_UNWINDER_ORC-y-the-default-in.patch b/patches/kernel/0066-x86-unwind-Make-CONFIG_UNWINDER_ORC-y-the-default-in.patch

deleted file mode 100644 (file)

index 3735815..0000000
--- a/patches/kernel/0066-x86-unwind-Make-CONFIG_UNWINDER_ORC-y-the-default-in.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Fri, 13 Oct 2017 15:02:01 -0500
-Subject: [PATCH] x86/unwind: Make CONFIG_UNWINDER_ORC=y the default in kconfig
- for 64-bit
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The ORC unwinder has been stable in testing so far.  Give it much wider
-testing by making it the default in kconfig for x86_64.  It's not yet
-supported for 32-bit, so leave frame pointers as the default there.
-
-Suggested-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/9b1237bbe7244ed9cdf8db2dcb1253e37e1c341e.1507924831.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit fc72ae40e30327aa24eb88a24b9c7058f938bd36)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit aff8d5169f46ae6ac0eb26a5ba745aaf9afa0704)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/Kconfig.debug | 33 +++++++++++++++++----------------
- 1 file changed, 17 insertions(+), 16 deletions(-)
-
-diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
-index 5435a943f894..7d88e9878a75 100644
---- a/arch/x86/Kconfig.debug
-+++ b/arch/x86/Kconfig.debug
-@@ -358,27 +358,13 @@ config PUNIT_ATOM_DEBUG
- 
- choice
-       prompt "Choose kernel unwinder"
--      default UNWINDER_FRAME_POINTER
-+      default UNWINDER_ORC if X86_64
-+      default UNWINDER_FRAME_POINTER if X86_32
-       ---help---
-         This determines which method will be used for unwinding kernel stack
-         traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
-         livepatch, lockdep, and more.
- 
--config UNWINDER_FRAME_POINTER
--      bool "Frame pointer unwinder"
--      select FRAME_POINTER
--      ---help---
--        This option enables the frame pointer unwinder for unwinding kernel
--        stack traces.
--
--        The unwinder itself is fast and it uses less RAM than the ORC
--        unwinder, but the kernel text size will grow by ~3% and the kernel's
--        overall performance will degrade by roughly 5-10%.
--
--        This option is recommended if you want to use the livepatch
--        consistency model, as this is currently the only way to get a
--        reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
--
- config UNWINDER_ORC
-       bool "ORC unwinder"
-       depends on X86_64
-@@ -396,6 +382,21 @@ config UNWINDER_ORC
-         Enabling this option will increase the kernel's runtime memory usage
-         by roughly 2-4MB, depending on your kernel config.
- 
-+config UNWINDER_FRAME_POINTER
-+      bool "Frame pointer unwinder"
-+      select FRAME_POINTER
-+      ---help---
-+        This option enables the frame pointer unwinder for unwinding kernel
-+        stack traces.
-+
-+        The unwinder itself is fast and it uses less RAM than the ORC
-+        unwinder, but the kernel text size will grow by ~3% and the kernel's
-+        overall performance will degrade by roughly 5-10%.
-+
-+        This option is recommended if you want to use the livepatch
-+        consistency model, as this is currently the only way to get a
-+        reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
-+
- config UNWINDER_GUESS
-       bool "Guess unwinder"
-       depends on EXPERT
--- 
-2.14.2
-
diff --git a/patches/kernel/0066-x86-unwind-Rename-unwinder-config-options-to-CONFIG_.patch b/patches/kernel/0066-x86-unwind-Rename-unwinder-config-options-to-CONFIG_.patch

new file mode 100644 (file)

index 0000000..9689db5
--- /dev/null
+++ b/patches/kernel/0066-x86-unwind-Rename-unwinder-config-options-to-CONFIG_.patch
@@ -0,0 +1,273 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Fri, 13 Oct 2017 15:02:00 -0500
+Subject: [PATCH] x86/unwind: Rename unwinder config options to
+ 'CONFIG_UNWINDER_*'
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Rename the unwinder config options from:
+
+  CONFIG_ORC_UNWINDER
+  CONFIG_FRAME_POINTER_UNWINDER
+  CONFIG_GUESS_UNWINDER
+
+to:
+
+  CONFIG_UNWINDER_ORC
+  CONFIG_UNWINDER_FRAME_POINTER
+  CONFIG_UNWINDER_GUESS
+
+... in order to give them a more logical config namespace.
+
+Suggested-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/73972fc7e2762e91912c6b9584582703d6f1b8cc.1507924831.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 11af847446ed0d131cf24d16a7ef3d5ea7a49554)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 27ab2a240a797b073ce63385b1d5db06e44fc3ae)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/x86/orc-unwinder.txt |  2 +-
+ Makefile                           |  4 ++--
+ arch/x86/kernel/Makefile           |  6 +++---
+ scripts/Makefile.build             |  2 +-
+ arch/x86/include/asm/module.h      |  2 +-
+ arch/x86/include/asm/unwind.h      |  8 ++++----
+ include/asm-generic/vmlinux.lds.h  |  2 +-
+ arch/x86/Kconfig                   |  2 +-
+ arch/x86/Kconfig.debug             | 10 +++++-----
+ arch/x86/configs/tiny.config       |  4 ++--
+ arch/x86/configs/x86_64_defconfig  |  2 +-
+ lib/Kconfig.debug                  |  2 +-
+ 12 files changed, 23 insertions(+), 23 deletions(-)
+
+diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
+index af0c9a4c65a6..cd4b29be29af 100644
+--- a/Documentation/x86/orc-unwinder.txt
++++ b/Documentation/x86/orc-unwinder.txt
+@@ -4,7 +4,7 @@ ORC unwinder
+ Overview
+ --------
+ 
+-The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
++The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
+ similar in concept to a DWARF unwinder.  The difference is that the
+ format of the ORC data is much simpler than DWARF, which in turn allows
+ the ORC unwinder to be much simpler and faster.
+diff --git a/Makefile b/Makefile
+index 490ce18685ea..b740e3dc9ff8 100644
+--- a/Makefile
++++ b/Makefile
+@@ -965,8 +965,8 @@ ifdef CONFIG_STACK_VALIDATION
+   ifeq ($(has_libelf),1)
+     objtool_target := tools/objtool FORCE
+   else
+-    ifdef CONFIG_ORC_UNWINDER
+-      $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
++    ifdef CONFIG_UNWINDER_ORC
++      $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
+     else
+       $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
+     endif
+diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
+index e2315aecc441..5bf0d5a473b4 100644
+--- a/arch/x86/kernel/Makefile
++++ b/arch/x86/kernel/Makefile
+@@ -125,9 +125,9 @@ obj-$(CONFIG_PERF_EVENTS)          += perf_regs.o
+ obj-$(CONFIG_TRACING)                 += tracepoint.o
+ obj-$(CONFIG_SCHED_MC_PRIO)           += itmt.o
+ 
+-obj-$(CONFIG_ORC_UNWINDER)            += unwind_orc.o
+-obj-$(CONFIG_FRAME_POINTER_UNWINDER)  += unwind_frame.o
+-obj-$(CONFIG_GUESS_UNWINDER)          += unwind_guess.o
++obj-$(CONFIG_UNWINDER_ORC)            += unwind_orc.o
++obj-$(CONFIG_UNWINDER_FRAME_POINTER)  += unwind_frame.o
++obj-$(CONFIG_UNWINDER_GUESS)          += unwind_guess.o
+ 
+ ###
+ # 64 bit specific files
+diff --git a/scripts/Makefile.build b/scripts/Makefile.build
+index ab2c8ef43cdb..436005392047 100644
+--- a/scripts/Makefile.build
++++ b/scripts/Makefile.build
+@@ -258,7 +258,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1)
+ 
+ __objtool_obj := $(objtree)/tools/objtool/objtool
+ 
+-objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check)
++objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
+ 
+ ifndef CONFIG_FRAME_POINTER
+ objtool_args += --no-fp
+diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
+index 9eb7c718aaf8..9f05a1002aa9 100644
+--- a/arch/x86/include/asm/module.h
++++ b/arch/x86/include/asm/module.h
+@@ -5,7 +5,7 @@
+ #include <asm/orc_types.h>
+ 
+ struct mod_arch_specific {
+-#ifdef CONFIG_ORC_UNWINDER
++#ifdef CONFIG_UNWINDER_ORC
+       unsigned int num_orcs;
+       int *orc_unwind_ip;
+       struct orc_entry *orc_unwind;
+diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
+index e9f793e2df7a..35d67dc7b69f 100644
+--- a/arch/x86/include/asm/unwind.h
++++ b/arch/x86/include/asm/unwind.h
+@@ -12,11 +12,11 @@ struct unwind_state {
+       struct task_struct *task;
+       int graph_idx;
+       bool error;
+-#if defined(CONFIG_ORC_UNWINDER)
++#if defined(CONFIG_UNWINDER_ORC)
+       bool signal, full_regs;
+       unsigned long sp, bp, ip;
+       struct pt_regs *regs;
+-#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
++#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
+       bool got_irq;
+       unsigned long *bp, *orig_sp, ip;
+       struct pt_regs *regs;
+@@ -50,7 +50,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
+       __unwind_start(state, task, regs, first_frame);
+ }
+ 
+-#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
++#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
+ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+ {
+       if (unwind_done(state))
+@@ -65,7 +65,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+ }
+ #endif
+ 
+-#ifdef CONFIG_ORC_UNWINDER
++#ifdef CONFIG_UNWINDER_ORC
+ void unwind_init(void);
+ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
+                       void *orc, size_t orc_size);
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index 9fdb54a95976..e71e42432360 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -686,7 +686,7 @@
+ #define BUG_TABLE
+ #endif
+ 
+-#ifdef CONFIG_ORC_UNWINDER
++#ifdef CONFIG_UNWINDER_ORC
+ #define ORC_UNWIND_TABLE                                              \
+       . = ALIGN(4);                                                   \
+       .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) {       \
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 3a0b8cb57caf..bf9f03740c30 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -168,7 +168,7 @@ config X86
+       select HAVE_PERF_REGS
+       select HAVE_PERF_USER_STACK_DUMP
+       select HAVE_REGS_AND_STACK_ACCESS_API
+-      select HAVE_RELIABLE_STACKTRACE         if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
++      select HAVE_RELIABLE_STACKTRACE         if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
+       select HAVE_STACK_VALIDATION            if X86_64
+       select HAVE_SYSCALL_TRACEPOINTS
+       select HAVE_UNSTABLE_SCHED_CLOCK
+diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
+index c441b5d65ec8..5435a943f894 100644
+--- a/arch/x86/Kconfig.debug
++++ b/arch/x86/Kconfig.debug
+@@ -358,13 +358,13 @@ config PUNIT_ATOM_DEBUG
+ 
+ choice
+       prompt "Choose kernel unwinder"
+-      default FRAME_POINTER_UNWINDER
++      default UNWINDER_FRAME_POINTER
+       ---help---
+         This determines which method will be used for unwinding kernel stack
+         traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
+         livepatch, lockdep, and more.
+ 
+-config FRAME_POINTER_UNWINDER
++config UNWINDER_FRAME_POINTER
+       bool "Frame pointer unwinder"
+       select FRAME_POINTER
+       ---help---
+@@ -379,7 +379,7 @@ config FRAME_POINTER_UNWINDER
+         consistency model, as this is currently the only way to get a
+         reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
+ 
+-config ORC_UNWINDER
++config UNWINDER_ORC
+       bool "ORC unwinder"
+       depends on X86_64
+       select STACK_VALIDATION
+@@ -396,7 +396,7 @@ config ORC_UNWINDER
+         Enabling this option will increase the kernel's runtime memory usage
+         by roughly 2-4MB, depending on your kernel config.
+ 
+-config GUESS_UNWINDER
++config UNWINDER_GUESS
+       bool "Guess unwinder"
+       depends on EXPERT
+       ---help---
+@@ -411,7 +411,7 @@ config GUESS_UNWINDER
+ endchoice
+ 
+ config FRAME_POINTER
+-      depends on !ORC_UNWINDER && !GUESS_UNWINDER
++      depends on !UNWINDER_ORC && !UNWINDER_GUESS
+       bool
+ 
+ endmenu
+diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
+index 550cd5012b73..66c9e2aab16c 100644
+--- a/arch/x86/configs/tiny.config
++++ b/arch/x86/configs/tiny.config
+@@ -1,5 +1,5 @@
+ CONFIG_NOHIGHMEM=y
+ # CONFIG_HIGHMEM4G is not set
+ # CONFIG_HIGHMEM64G is not set
+-CONFIG_GUESS_UNWINDER=y
+-# CONFIG_FRAME_POINTER_UNWINDER is not set
++CONFIG_UNWINDER_GUESS=y
++# CONFIG_UNWINDER_FRAME_POINTER is not set
+diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
+index eb65c248708d..e32fc1f274d8 100644
+--- a/arch/x86/configs/x86_64_defconfig
++++ b/arch/x86/configs/x86_64_defconfig
+@@ -299,7 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
+ # CONFIG_DEBUG_RODATA_TEST is not set
+ CONFIG_DEBUG_BOOT_PARAMS=y
+ CONFIG_OPTIMIZE_INLINING=y
+-CONFIG_ORC_UNWINDER=y
++CONFIG_UNWINDER_ORC=y
+ CONFIG_SECURITY=y
+ CONFIG_SECURITY_NETWORK=y
+ CONFIG_SECURITY_SELINUX=y
+diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
+index 0b4d1b3880b0..4f6ca5f60f7e 100644
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -375,7 +375,7 @@ config STACK_VALIDATION
+         that runtime stack traces are more reliable.
+ 
+         This is also a prerequisite for generation of ORC unwind data, which
+-        is needed for CONFIG_ORC_UNWINDER.
++        is needed for CONFIG_UNWINDER_ORC.
+ 
+         For more information, see
+         tools/objtool/Documentation/stack-validation.txt.
+-- 
+2.14.2
+
diff --git a/patches/kernel/0067-bitops-Add-clear-set_bit32-to-linux-bitops.h.patch b/patches/kernel/0067-bitops-Add-clear-set_bit32-to-linux-bitops.h.patch

deleted file mode 100644 (file)

index 302f3fb..0000000
--- a/patches/kernel/0067-bitops-Add-clear-set_bit32-to-linux-bitops.h.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andi Kleen <ak@linux.intel.com>
-Date: Fri, 13 Oct 2017 14:56:41 -0700
-Subject: [PATCH] bitops: Add clear/set_bit32() to linux/bitops.h
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Add two simple wrappers around set_bit/clear_bit() that accept
-the common case of an u32 array. This avoids writing
-casts in all callers.
-
-Signed-off-by: Andi Kleen <ak@linux.intel.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Link: http://lkml.kernel.org/r/20171013215645.23166-2-andi@firstfloor.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit cbe96375025e14fc76f9ed42ee5225120d7210f8)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 06d31c11519ca0e8f9b7cab857f442ef44dfc1b2)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- include/linux/bitops.h | 26 ++++++++++++++++++++++++++
- 1 file changed, 26 insertions(+)
-
-diff --git a/include/linux/bitops.h b/include/linux/bitops.h
-index a83c822c35c2..eb257a96db6d 100644
---- a/include/linux/bitops.h
-+++ b/include/linux/bitops.h
-@@ -226,6 +226,32 @@ static inline unsigned long __ffs64(u64 word)
-       return __ffs((unsigned long)word);
- }
- 
-+/*
-+ * clear_bit32 - Clear a bit in memory for u32 array
-+ * @nr: Bit to clear
-+ * @addr: u32 * address of bitmap
-+ *
-+ * Same as clear_bit, but avoids needing casts for u32 arrays.
-+ */
-+
-+static __always_inline void clear_bit32(long nr, volatile u32 *addr)
-+{
-+      clear_bit(nr, (volatile unsigned long *)addr);
-+}
-+
-+/*
-+ * set_bit32 - Set a bit in memory for u32 array
-+ * @nr: Bit to clear
-+ * @addr: u32 * address of bitmap
-+ *
-+ * Same as set_bit, but avoids needing casts for u32 arrays.
-+ */
-+
-+static __always_inline void set_bit32(long nr, volatile u32 *addr)
-+{
-+      set_bit(nr, (volatile unsigned long *)addr);
-+}
-+
- #ifdef __KERNEL__
- 
- #ifndef set_mask_bits
--- 
-2.14.2
-
diff --git a/patches/kernel/0067-x86-unwind-Make-CONFIG_UNWINDER_ORC-y-the-default-in.patch b/patches/kernel/0067-x86-unwind-Make-CONFIG_UNWINDER_ORC-y-the-default-in.patch

new file mode 100644 (file)

index 0000000..3735815
--- /dev/null
+++ b/patches/kernel/0067-x86-unwind-Make-CONFIG_UNWINDER_ORC-y-the-default-in.patch
@@ -0,0 +1,90 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Fri, 13 Oct 2017 15:02:01 -0500
+Subject: [PATCH] x86/unwind: Make CONFIG_UNWINDER_ORC=y the default in kconfig
+ for 64-bit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The ORC unwinder has been stable in testing so far.  Give it much wider
+testing by making it the default in kconfig for x86_64.  It's not yet
+supported for 32-bit, so leave frame pointers as the default there.
+
+Suggested-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/9b1237bbe7244ed9cdf8db2dcb1253e37e1c341e.1507924831.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit fc72ae40e30327aa24eb88a24b9c7058f938bd36)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit aff8d5169f46ae6ac0eb26a5ba745aaf9afa0704)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/Kconfig.debug | 33 +++++++++++++++++----------------
+ 1 file changed, 17 insertions(+), 16 deletions(-)
+
+diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
+index 5435a943f894..7d88e9878a75 100644
+--- a/arch/x86/Kconfig.debug
++++ b/arch/x86/Kconfig.debug
+@@ -358,27 +358,13 @@ config PUNIT_ATOM_DEBUG
+ 
+ choice
+       prompt "Choose kernel unwinder"
+-      default UNWINDER_FRAME_POINTER
++      default UNWINDER_ORC if X86_64
++      default UNWINDER_FRAME_POINTER if X86_32
+       ---help---
+         This determines which method will be used for unwinding kernel stack
+         traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
+         livepatch, lockdep, and more.
+ 
+-config UNWINDER_FRAME_POINTER
+-      bool "Frame pointer unwinder"
+-      select FRAME_POINTER
+-      ---help---
+-        This option enables the frame pointer unwinder for unwinding kernel
+-        stack traces.
+-
+-        The unwinder itself is fast and it uses less RAM than the ORC
+-        unwinder, but the kernel text size will grow by ~3% and the kernel's
+-        overall performance will degrade by roughly 5-10%.
+-
+-        This option is recommended if you want to use the livepatch
+-        consistency model, as this is currently the only way to get a
+-        reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
+-
+ config UNWINDER_ORC
+       bool "ORC unwinder"
+       depends on X86_64
+@@ -396,6 +382,21 @@ config UNWINDER_ORC
+         Enabling this option will increase the kernel's runtime memory usage
+         by roughly 2-4MB, depending on your kernel config.
+ 
++config UNWINDER_FRAME_POINTER
++      bool "Frame pointer unwinder"
++      select FRAME_POINTER
++      ---help---
++        This option enables the frame pointer unwinder for unwinding kernel
++        stack traces.
++
++        The unwinder itself is fast and it uses less RAM than the ORC
++        unwinder, but the kernel text size will grow by ~3% and the kernel's
++        overall performance will degrade by roughly 5-10%.
++
++        This option is recommended if you want to use the livepatch
++        consistency model, as this is currently the only way to get a
++        reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
++
+ config UNWINDER_GUESS
+       bool "Guess unwinder"
+       depends on EXPERT
+-- 
+2.14.2
+
diff --git a/patches/kernel/0068-bitops-Add-clear-set_bit32-to-linux-bitops.h.patch b/patches/kernel/0068-bitops-Add-clear-set_bit32-to-linux-bitops.h.patch

new file mode 100644 (file)

index 0000000..302f3fb
--- /dev/null
+++ b/patches/kernel/0068-bitops-Add-clear-set_bit32-to-linux-bitops.h.patch
@@ -0,0 +1,69 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andi Kleen <ak@linux.intel.com>
+Date: Fri, 13 Oct 2017 14:56:41 -0700
+Subject: [PATCH] bitops: Add clear/set_bit32() to linux/bitops.h
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Add two simple wrappers around set_bit/clear_bit() that accept
+the common case of an u32 array. This avoids writing
+casts in all callers.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/20171013215645.23166-2-andi@firstfloor.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit cbe96375025e14fc76f9ed42ee5225120d7210f8)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 06d31c11519ca0e8f9b7cab857f442ef44dfc1b2)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ include/linux/bitops.h | 26 ++++++++++++++++++++++++++
+ 1 file changed, 26 insertions(+)
+
+diff --git a/include/linux/bitops.h b/include/linux/bitops.h
+index a83c822c35c2..eb257a96db6d 100644
+--- a/include/linux/bitops.h
++++ b/include/linux/bitops.h
+@@ -226,6 +226,32 @@ static inline unsigned long __ffs64(u64 word)
+       return __ffs((unsigned long)word);
+ }
+ 
++/*
++ * clear_bit32 - Clear a bit in memory for u32 array
++ * @nr: Bit to clear
++ * @addr: u32 * address of bitmap
++ *
++ * Same as clear_bit, but avoids needing casts for u32 arrays.
++ */
++
++static __always_inline void clear_bit32(long nr, volatile u32 *addr)
++{
++      clear_bit(nr, (volatile unsigned long *)addr);
++}
++
++/*
++ * set_bit32 - Set a bit in memory for u32 array
++ * @nr: Bit to clear
++ * @addr: u32 * address of bitmap
++ *
++ * Same as set_bit, but avoids needing casts for u32 arrays.
++ */
++
++static __always_inline void set_bit32(long nr, volatile u32 *addr)
++{
++      set_bit(nr, (volatile unsigned long *)addr);
++}
++
+ #ifdef __KERNEL__
+ 
+ #ifndef set_mask_bits
+-- 
+2.14.2
+
diff --git a/patches/kernel/0068-x86-cpuid-Add-generic-table-for-CPUID-dependencies.patch b/patches/kernel/0068-x86-cpuid-Add-generic-table-for-CPUID-dependencies.patch

deleted file mode 100644 (file)

index 4c4b7ba..0000000
--- a/patches/kernel/0068-x86-cpuid-Add-generic-table-for-CPUID-dependencies.patch
+++ /dev/null
@@ -1,221 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andi Kleen <ak@linux.intel.com>
-Date: Fri, 13 Oct 2017 14:56:42 -0700
-Subject: [PATCH] x86/cpuid: Add generic table for CPUID dependencies
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Some CPUID features depend on other features. Currently it's
-possible to to clear dependent features, but not clear the base features,
-which can cause various interesting problems.
-
-This patch implements a generic table to describe dependencies
-between CPUID features, to be used by all code that clears
-CPUID.
-
-Some subsystems (like XSAVE) had an own implementation of this,
-but it's better to do it all in a single place for everyone.
-
-Then clear_cpu_cap and setup_clear_cpu_cap always look up
-this table and clear all dependencies too.
-
-This is intended to be a practical table: only for features
-that make sense to clear. If someone for example clears FPU,
-or other features that are essentially part of the required
-base feature set, not much is going to work. Handling
-that is right now out of scope. We're only handling
-features which can be usefully cleared.
-
-Signed-off-by: Andi Kleen <ak@linux.intel.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Jonathan McDowell <noodles@earth.li>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Link: http://lkml.kernel.org/r/20171013215645.23166-3-andi@firstfloor.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 0b00de857a648dafe7020878c7a27cf776f5edf4)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 35672522f2fc9a2e116ed1766f190bc08ef5582a)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/Makefile       |   1 +
- arch/x86/include/asm/cpufeature.h  |   9 ++-
- arch/x86/include/asm/cpufeatures.h |   5 ++
- arch/x86/kernel/cpu/cpuid-deps.c   | 113 +++++++++++++++++++++++++++++++++++++
- 4 files changed, 123 insertions(+), 5 deletions(-)
- create mode 100644 arch/x86/kernel/cpu/cpuid-deps.c
-
-diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
-index e17942c131c8..de260fae1017 100644
---- a/arch/x86/kernel/cpu/Makefile
-+++ b/arch/x86/kernel/cpu/Makefile
-@@ -22,6 +22,7 @@ obj-y                        += rdrand.o
- obj-y                 += match.o
- obj-y                 += bugs.o
- obj-$(CONFIG_CPU_FREQ)        += aperfmperf.o
-+obj-y                 += cpuid-deps.o
- 
- obj-$(CONFIG_PROC_FS) += proc.o
- obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
-diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
-index d59c15c3defd..225fd8374fae 100644
---- a/arch/x86/include/asm/cpufeature.h
-+++ b/arch/x86/include/asm/cpufeature.h
-@@ -125,11 +125,10 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
- #define boot_cpu_has(bit)     cpu_has(&boot_cpu_data, bit)
- 
- #define set_cpu_cap(c, bit)   set_bit(bit, (unsigned long *)((c)->x86_capability))
--#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
--#define setup_clear_cpu_cap(bit) do { \
--      clear_cpu_cap(&boot_cpu_data, bit);     \
--      set_bit(bit, (unsigned long *)cpu_caps_cleared); \
--} while (0)
-+
-+extern void setup_clear_cpu_cap(unsigned int bit);
-+extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
-+
- #define setup_force_cpu_cap(bit) do { \
-       set_cpu_cap(&boot_cpu_data, bit);       \
-       set_bit(bit, (unsigned long *)cpu_caps_set);    \
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index 5a28e8e55e36..f4e145c4b06f 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -21,6 +21,11 @@
-  * this feature bit is not displayed in /proc/cpuinfo at all.
-  */
- 
-+/*
-+ * When adding new features here that depend on other features,
-+ * please update the table in kernel/cpu/cpuid-deps.c
-+ */
-+
- /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
- #define X86_FEATURE_FPU               ( 0*32+ 0) /* Onboard FPU */
- #define X86_FEATURE_VME               ( 0*32+ 1) /* Virtual Mode Extensions */
-diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
-new file mode 100644
-index 000000000000..e48eb7313120
---- /dev/null
-+++ b/arch/x86/kernel/cpu/cpuid-deps.c
-@@ -0,0 +1,113 @@
-+/* Declare dependencies between CPUIDs */
-+#include <linux/kernel.h>
-+#include <linux/init.h>
-+#include <linux/module.h>
-+#include <asm/cpufeature.h>
-+
-+struct cpuid_dep {
-+      unsigned int    feature;
-+      unsigned int    depends;
-+};
-+
-+/*
-+ * Table of CPUID features that depend on others.
-+ *
-+ * This only includes dependencies that can be usefully disabled, not
-+ * features part of the base set (like FPU).
-+ *
-+ * Note this all is not __init / __initdata because it can be
-+ * called from cpu hotplug. It shouldn't do anything in this case,
-+ * but it's difficult to tell that to the init reference checker.
-+ */
-+const static struct cpuid_dep cpuid_deps[] = {
-+      { X86_FEATURE_XSAVEOPT,         X86_FEATURE_XSAVE     },
-+      { X86_FEATURE_XSAVEC,           X86_FEATURE_XSAVE     },
-+      { X86_FEATURE_XSAVES,           X86_FEATURE_XSAVE     },
-+      { X86_FEATURE_AVX,              X86_FEATURE_XSAVE     },
-+      { X86_FEATURE_PKU,              X86_FEATURE_XSAVE     },
-+      { X86_FEATURE_MPX,              X86_FEATURE_XSAVE     },
-+      { X86_FEATURE_XGETBV1,          X86_FEATURE_XSAVE     },
-+      { X86_FEATURE_FXSR_OPT,         X86_FEATURE_FXSR      },
-+      { X86_FEATURE_XMM,              X86_FEATURE_FXSR      },
-+      { X86_FEATURE_XMM2,             X86_FEATURE_XMM       },
-+      { X86_FEATURE_XMM3,             X86_FEATURE_XMM2      },
-+      { X86_FEATURE_XMM4_1,           X86_FEATURE_XMM2      },
-+      { X86_FEATURE_XMM4_2,           X86_FEATURE_XMM2      },
-+      { X86_FEATURE_XMM3,             X86_FEATURE_XMM2      },
-+      { X86_FEATURE_PCLMULQDQ,        X86_FEATURE_XMM2      },
-+      { X86_FEATURE_SSSE3,            X86_FEATURE_XMM2,     },
-+      { X86_FEATURE_F16C,             X86_FEATURE_XMM2,     },
-+      { X86_FEATURE_AES,              X86_FEATURE_XMM2      },
-+      { X86_FEATURE_SHA_NI,           X86_FEATURE_XMM2      },
-+      { X86_FEATURE_FMA,              X86_FEATURE_AVX       },
-+      { X86_FEATURE_AVX2,             X86_FEATURE_AVX,      },
-+      { X86_FEATURE_AVX512F,          X86_FEATURE_AVX,      },
-+      { X86_FEATURE_AVX512IFMA,       X86_FEATURE_AVX512F   },
-+      { X86_FEATURE_AVX512PF,         X86_FEATURE_AVX512F   },
-+      { X86_FEATURE_AVX512ER,         X86_FEATURE_AVX512F   },
-+      { X86_FEATURE_AVX512CD,         X86_FEATURE_AVX512F   },
-+      { X86_FEATURE_AVX512DQ,         X86_FEATURE_AVX512F   },
-+      { X86_FEATURE_AVX512BW,         X86_FEATURE_AVX512F   },
-+      { X86_FEATURE_AVX512VL,         X86_FEATURE_AVX512F   },
-+      { X86_FEATURE_AVX512VBMI,       X86_FEATURE_AVX512F   },
-+      { X86_FEATURE_AVX512_4VNNIW,    X86_FEATURE_AVX512F   },
-+      { X86_FEATURE_AVX512_4FMAPS,    X86_FEATURE_AVX512F   },
-+      { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F   },
-+      {}
-+};
-+
-+static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit)
-+{
-+      clear_bit32(bit, c->x86_capability);
-+}
-+
-+static inline void __setup_clear_cpu_cap(unsigned int bit)
-+{
-+      clear_cpu_cap(&boot_cpu_data, bit);
-+      set_bit32(bit, cpu_caps_cleared);
-+}
-+
-+static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
-+{
-+      if (!c)
-+              __setup_clear_cpu_cap(feature);
-+      else
-+              __clear_cpu_cap(c, feature);
-+}
-+
-+static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
-+{
-+      bool changed;
-+      DECLARE_BITMAP(disable, NCAPINTS * sizeof(u32) * 8);
-+      const struct cpuid_dep *d;
-+
-+      clear_feature(c, feature);
-+
-+      /* Collect all features to disable, handling dependencies */
-+      memset(disable, 0, sizeof(disable));
-+      __set_bit(feature, disable);
-+
-+      /* Loop until we get a stable state. */
-+      do {
-+              changed = false;
-+              for (d = cpuid_deps; d->feature; d++) {
-+                      if (!test_bit(d->depends, disable))
-+                              continue;
-+                      if (__test_and_set_bit(d->feature, disable))
-+                              continue;
-+
-+                      changed = true;
-+                      clear_feature(c, d->feature);
-+              }
-+      } while (changed);
-+}
-+
-+void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
-+{
-+      do_clear_cpu_cap(c, feature);
-+}
-+
-+void setup_clear_cpu_cap(unsigned int feature)
-+{
-+      do_clear_cpu_cap(NULL, feature);
-+}
--- 
-2.14.2
-
diff --git a/patches/kernel/0069-x86-cpuid-Add-generic-table-for-CPUID-dependencies.patch b/patches/kernel/0069-x86-cpuid-Add-generic-table-for-CPUID-dependencies.patch

new file mode 100644 (file)

index 0000000..4c4b7ba
--- /dev/null
+++ b/patches/kernel/0069-x86-cpuid-Add-generic-table-for-CPUID-dependencies.patch
@@ -0,0 +1,221 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andi Kleen <ak@linux.intel.com>
+Date: Fri, 13 Oct 2017 14:56:42 -0700
+Subject: [PATCH] x86/cpuid: Add generic table for CPUID dependencies
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Some CPUID features depend on other features. Currently it's
+possible to to clear dependent features, but not clear the base features,
+which can cause various interesting problems.
+
+This patch implements a generic table to describe dependencies
+between CPUID features, to be used by all code that clears
+CPUID.
+
+Some subsystems (like XSAVE) had an own implementation of this,
+but it's better to do it all in a single place for everyone.
+
+Then clear_cpu_cap and setup_clear_cpu_cap always look up
+this table and clear all dependencies too.
+
+This is intended to be a practical table: only for features
+that make sense to clear. If someone for example clears FPU,
+or other features that are essentially part of the required
+base feature set, not much is going to work. Handling
+that is right now out of scope. We're only handling
+features which can be usefully cleared.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Jonathan McDowell <noodles@earth.li>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/20171013215645.23166-3-andi@firstfloor.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 0b00de857a648dafe7020878c7a27cf776f5edf4)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 35672522f2fc9a2e116ed1766f190bc08ef5582a)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/Makefile       |   1 +
+ arch/x86/include/asm/cpufeature.h  |   9 ++-
+ arch/x86/include/asm/cpufeatures.h |   5 ++
+ arch/x86/kernel/cpu/cpuid-deps.c   | 113 +++++++++++++++++++++++++++++++++++++
+ 4 files changed, 123 insertions(+), 5 deletions(-)
+ create mode 100644 arch/x86/kernel/cpu/cpuid-deps.c
+
+diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
+index e17942c131c8..de260fae1017 100644
+--- a/arch/x86/kernel/cpu/Makefile
++++ b/arch/x86/kernel/cpu/Makefile
+@@ -22,6 +22,7 @@ obj-y                        += rdrand.o
+ obj-y                 += match.o
+ obj-y                 += bugs.o
+ obj-$(CONFIG_CPU_FREQ)        += aperfmperf.o
++obj-y                 += cpuid-deps.o
+ 
+ obj-$(CONFIG_PROC_FS) += proc.o
+ obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
+index d59c15c3defd..225fd8374fae 100644
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -125,11 +125,10 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ #define boot_cpu_has(bit)     cpu_has(&boot_cpu_data, bit)
+ 
+ #define set_cpu_cap(c, bit)   set_bit(bit, (unsigned long *)((c)->x86_capability))
+-#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
+-#define setup_clear_cpu_cap(bit) do { \
+-      clear_cpu_cap(&boot_cpu_data, bit);     \
+-      set_bit(bit, (unsigned long *)cpu_caps_cleared); \
+-} while (0)
++
++extern void setup_clear_cpu_cap(unsigned int bit);
++extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
++
+ #define setup_force_cpu_cap(bit) do { \
+       set_cpu_cap(&boot_cpu_data, bit);       \
+       set_bit(bit, (unsigned long *)cpu_caps_set);    \
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 5a28e8e55e36..f4e145c4b06f 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -21,6 +21,11 @@
+  * this feature bit is not displayed in /proc/cpuinfo at all.
+  */
+ 
++/*
++ * When adding new features here that depend on other features,
++ * please update the table in kernel/cpu/cpuid-deps.c
++ */
++
+ /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+ #define X86_FEATURE_FPU               ( 0*32+ 0) /* Onboard FPU */
+ #define X86_FEATURE_VME               ( 0*32+ 1) /* Virtual Mode Extensions */
+diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
+new file mode 100644
+index 000000000000..e48eb7313120
+--- /dev/null
++++ b/arch/x86/kernel/cpu/cpuid-deps.c
+@@ -0,0 +1,113 @@
++/* Declare dependencies between CPUIDs */
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <asm/cpufeature.h>
++
++struct cpuid_dep {
++      unsigned int    feature;
++      unsigned int    depends;
++};
++
++/*
++ * Table of CPUID features that depend on others.
++ *
++ * This only includes dependencies that can be usefully disabled, not
++ * features part of the base set (like FPU).
++ *
++ * Note this all is not __init / __initdata because it can be
++ * called from cpu hotplug. It shouldn't do anything in this case,
++ * but it's difficult to tell that to the init reference checker.
++ */
++const static struct cpuid_dep cpuid_deps[] = {
++      { X86_FEATURE_XSAVEOPT,         X86_FEATURE_XSAVE     },
++      { X86_FEATURE_XSAVEC,           X86_FEATURE_XSAVE     },
++      { X86_FEATURE_XSAVES,           X86_FEATURE_XSAVE     },
++      { X86_FEATURE_AVX,              X86_FEATURE_XSAVE     },
++      { X86_FEATURE_PKU,              X86_FEATURE_XSAVE     },
++      { X86_FEATURE_MPX,              X86_FEATURE_XSAVE     },
++      { X86_FEATURE_XGETBV1,          X86_FEATURE_XSAVE     },
++      { X86_FEATURE_FXSR_OPT,         X86_FEATURE_FXSR      },
++      { X86_FEATURE_XMM,              X86_FEATURE_FXSR      },
++      { X86_FEATURE_XMM2,             X86_FEATURE_XMM       },
++      { X86_FEATURE_XMM3,             X86_FEATURE_XMM2      },
++      { X86_FEATURE_XMM4_1,           X86_FEATURE_XMM2      },
++      { X86_FEATURE_XMM4_2,           X86_FEATURE_XMM2      },
++      { X86_FEATURE_XMM3,             X86_FEATURE_XMM2      },
++      { X86_FEATURE_PCLMULQDQ,        X86_FEATURE_XMM2      },
++      { X86_FEATURE_SSSE3,            X86_FEATURE_XMM2,     },
++      { X86_FEATURE_F16C,             X86_FEATURE_XMM2,     },
++      { X86_FEATURE_AES,              X86_FEATURE_XMM2      },
++      { X86_FEATURE_SHA_NI,           X86_FEATURE_XMM2      },
++      { X86_FEATURE_FMA,              X86_FEATURE_AVX       },
++      { X86_FEATURE_AVX2,             X86_FEATURE_AVX,      },
++      { X86_FEATURE_AVX512F,          X86_FEATURE_AVX,      },
++      { X86_FEATURE_AVX512IFMA,       X86_FEATURE_AVX512F   },
++      { X86_FEATURE_AVX512PF,         X86_FEATURE_AVX512F   },
++      { X86_FEATURE_AVX512ER,         X86_FEATURE_AVX512F   },
++      { X86_FEATURE_AVX512CD,         X86_FEATURE_AVX512F   },
++      { X86_FEATURE_AVX512DQ,         X86_FEATURE_AVX512F   },
++      { X86_FEATURE_AVX512BW,         X86_FEATURE_AVX512F   },
++      { X86_FEATURE_AVX512VL,         X86_FEATURE_AVX512F   },
++      { X86_FEATURE_AVX512VBMI,       X86_FEATURE_AVX512F   },
++      { X86_FEATURE_AVX512_4VNNIW,    X86_FEATURE_AVX512F   },
++      { X86_FEATURE_AVX512_4FMAPS,    X86_FEATURE_AVX512F   },
++      { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F   },
++      {}
++};
++
++static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit)
++{
++      clear_bit32(bit, c->x86_capability);
++}
++
++static inline void __setup_clear_cpu_cap(unsigned int bit)
++{
++      clear_cpu_cap(&boot_cpu_data, bit);
++      set_bit32(bit, cpu_caps_cleared);
++}
++
++static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
++{
++      if (!c)
++              __setup_clear_cpu_cap(feature);
++      else
++              __clear_cpu_cap(c, feature);
++}
++
++static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
++{
++      bool changed;
++      DECLARE_BITMAP(disable, NCAPINTS * sizeof(u32) * 8);
++      const struct cpuid_dep *d;
++
++      clear_feature(c, feature);
++
++      /* Collect all features to disable, handling dependencies */
++      memset(disable, 0, sizeof(disable));
++      __set_bit(feature, disable);
++
++      /* Loop until we get a stable state. */
++      do {
++              changed = false;
++              for (d = cpuid_deps; d->feature; d++) {
++                      if (!test_bit(d->depends, disable))
++                              continue;
++                      if (__test_and_set_bit(d->feature, disable))
++                              continue;
++
++                      changed = true;
++                      clear_feature(c, d->feature);
++              }
++      } while (changed);
++}
++
++void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
++{
++      do_clear_cpu_cap(c, feature);
++}
++
++void setup_clear_cpu_cap(unsigned int feature)
++{
++      do_clear_cpu_cap(NULL, feature);
++}
+-- 
+2.14.2
+
diff --git a/patches/kernel/0069-x86-fpu-Parse-clearcpuid-as-early-XSAVE-argument.patch b/patches/kernel/0069-x86-fpu-Parse-clearcpuid-as-early-XSAVE-argument.patch

deleted file mode 100644 (file)

index b4c6d58..0000000
--- a/patches/kernel/0069-x86-fpu-Parse-clearcpuid-as-early-XSAVE-argument.patch
+++ /dev/null
@@ -1,97 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andi Kleen <ak@linux.intel.com>
-Date: Fri, 13 Oct 2017 14:56:43 -0700
-Subject: [PATCH] x86/fpu: Parse clearcpuid= as early XSAVE argument
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-With a followon patch we want to make clearcpuid affect the XSAVE
-configuration. But xsave is currently initialized before arguments
-are parsed. Move the clearcpuid= parsing into the special
-early xsave argument parsing code.
-
-Since clearcpuid= contains a = we need to keep the old __setup
-around as a dummy, otherwise it would end up as a environment
-variable in init's environment.
-
-Signed-off-by: Andi Kleen <ak@linux.intel.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Link: http://lkml.kernel.org/r/20171013215645.23166-4-andi@firstfloor.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 0c2a3913d6f50503f7c59d83a6219e39508cc898)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 27deb452eb0d27c406f3817ab057201aa8767abe)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/common.c | 16 +++++++---------
- arch/x86/kernel/fpu/init.c   | 11 +++++++++++
- 2 files changed, 18 insertions(+), 9 deletions(-)
-
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 4be7b209a3d6..ef7b1ba56363 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -1293,18 +1293,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
-               pr_cont(")\n");
- }
- 
--static __init int setup_disablecpuid(char *arg)
-+/*
-+ * clearcpuid= was already parsed in fpu__init_parse_early_param.
-+ * But we need to keep a dummy __setup around otherwise it would
-+ * show up as an environment variable for init.
-+ */
-+static __init int setup_clearcpuid(char *arg)
- {
--      int bit;
--
--      if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
--              setup_clear_cpu_cap(bit);
--      else
--              return 0;
--
-       return 1;
- }
--__setup("clearcpuid=", setup_disablecpuid);
-+__setup("clearcpuid=", setup_clearcpuid);
- 
- #ifdef CONFIG_X86_64
- struct desc_ptr idt_descr __ro_after_init = {
-diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
-index d5d44c452624..07f0ab877f49 100644
---- a/arch/x86/kernel/fpu/init.c
-+++ b/arch/x86/kernel/fpu/init.c
-@@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
-  */
- static void __init fpu__init_parse_early_param(void)
- {
-+      char arg[32];
-+      char *argptr = arg;
-+      int bit;
-+
-       if (cmdline_find_option_bool(boot_command_line, "no387"))
-               setup_clear_cpu_cap(X86_FEATURE_FPU);
- 
-@@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
- 
-       if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
-               setup_clear_cpu_cap(X86_FEATURE_XSAVES);
-+
-+      if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
-+                              sizeof(arg)) &&
-+          get_option(&argptr, &bit) &&
-+          bit >= 0 &&
-+          bit < NCAPINTS * 32)
-+              setup_clear_cpu_cap(bit);
- }
- 
- /*
--- 
-2.14.2
-
diff --git a/patches/kernel/0070-x86-fpu-Make-XSAVE-check-the-base-CPUID-features-bef.patch b/patches/kernel/0070-x86-fpu-Make-XSAVE-check-the-base-CPUID-features-bef.patch

deleted file mode 100644 (file)

index 0c563c6..0000000
--- a/patches/kernel/0070-x86-fpu-Make-XSAVE-check-the-base-CPUID-features-bef.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andi Kleen <ak@linux.intel.com>
-Date: Fri, 13 Oct 2017 14:56:44 -0700
-Subject: [PATCH] x86/fpu: Make XSAVE check the base CPUID features before
- enabling
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Before enabling XSAVE, not only check the XSAVE specific CPUID bits,
-but also the base CPUID features of the respective XSAVE feature.
-This allows to disable individual XSAVE states using the existing
-clearcpuid= option, which can be useful for performance testing
-and debugging, and also in general avoids inconsistencies.
-
-Signed-off-by: Andi Kleen <ak@linux.intel.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Link: http://lkml.kernel.org/r/20171013215645.23166-5-andi@firstfloor.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit ccb18db2ab9d923df07e7495123fe5fb02329713)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 2efda26f9ee0eeb9919772e90ca30dbe59008dc8)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/fpu/xstate.c | 23 +++++++++++++++++++++++
- 1 file changed, 23 insertions(+)
-
-diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
-index c24ac1efb12d..3abe85b08234 100644
---- a/arch/x86/kernel/fpu/xstate.c
-+++ b/arch/x86/kernel/fpu/xstate.c
-@@ -15,6 +15,7 @@
- #include <asm/fpu/xstate.h>
- 
- #include <asm/tlbflush.h>
-+#include <asm/cpufeature.h>
- 
- /*
-  * Although we spell it out in here, the Processor Trace
-@@ -36,6 +37,19 @@ static const char *xfeature_names[] =
-       "unknown xstate feature"        ,
- };
- 
-+static short xsave_cpuid_features[] __initdata = {
-+      X86_FEATURE_FPU,
-+      X86_FEATURE_XMM,
-+      X86_FEATURE_AVX,
-+      X86_FEATURE_MPX,
-+      X86_FEATURE_MPX,
-+      X86_FEATURE_AVX512F,
-+      X86_FEATURE_AVX512F,
-+      X86_FEATURE_AVX512F,
-+      X86_FEATURE_INTEL_PT,
-+      X86_FEATURE_PKU,
-+};
-+
- /*
-  * Mask of xstate features supported by the CPU and the kernel:
-  */
-@@ -702,6 +716,7 @@ void __init fpu__init_system_xstate(void)
-       unsigned int eax, ebx, ecx, edx;
-       static int on_boot_cpu __initdata = 1;
-       int err;
-+      int i;
- 
-       WARN_ON_FPU(!on_boot_cpu);
-       on_boot_cpu = 0;
-@@ -735,6 +750,14 @@ void __init fpu__init_system_xstate(void)
-               goto out_disable;
-       }
- 
-+      /*
-+       * Clear XSAVE features that are disabled in the normal CPUID.
-+       */
-+      for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
-+              if (!boot_cpu_has(xsave_cpuid_features[i]))
-+                      xfeatures_mask &= ~BIT(i);
-+      }
-+
-       xfeatures_mask &= fpu__get_supported_xfeatures_mask();
- 
-       /* Enable xstate instructions to be able to continue with initialization: */
--- 
-2.14.2
-
diff --git a/patches/kernel/0070-x86-fpu-Parse-clearcpuid-as-early-XSAVE-argument.patch b/patches/kernel/0070-x86-fpu-Parse-clearcpuid-as-early-XSAVE-argument.patch

new file mode 100644 (file)

index 0000000..b4c6d58
--- /dev/null
+++ b/patches/kernel/0070-x86-fpu-Parse-clearcpuid-as-early-XSAVE-argument.patch
@@ -0,0 +1,97 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andi Kleen <ak@linux.intel.com>
+Date: Fri, 13 Oct 2017 14:56:43 -0700
+Subject: [PATCH] x86/fpu: Parse clearcpuid= as early XSAVE argument
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+With a followon patch we want to make clearcpuid affect the XSAVE
+configuration. But xsave is currently initialized before arguments
+are parsed. Move the clearcpuid= parsing into the special
+early xsave argument parsing code.
+
+Since clearcpuid= contains a = we need to keep the old __setup
+around as a dummy, otherwise it would end up as a environment
+variable in init's environment.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/20171013215645.23166-4-andi@firstfloor.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 0c2a3913d6f50503f7c59d83a6219e39508cc898)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 27deb452eb0d27c406f3817ab057201aa8767abe)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/common.c | 16 +++++++---------
+ arch/x86/kernel/fpu/init.c   | 11 +++++++++++
+ 2 files changed, 18 insertions(+), 9 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 4be7b209a3d6..ef7b1ba56363 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1293,18 +1293,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
+               pr_cont(")\n");
+ }
+ 
+-static __init int setup_disablecpuid(char *arg)
++/*
++ * clearcpuid= was already parsed in fpu__init_parse_early_param.
++ * But we need to keep a dummy __setup around otherwise it would
++ * show up as an environment variable for init.
++ */
++static __init int setup_clearcpuid(char *arg)
+ {
+-      int bit;
+-
+-      if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
+-              setup_clear_cpu_cap(bit);
+-      else
+-              return 0;
+-
+       return 1;
+ }
+-__setup("clearcpuid=", setup_disablecpuid);
++__setup("clearcpuid=", setup_clearcpuid);
+ 
+ #ifdef CONFIG_X86_64
+ struct desc_ptr idt_descr __ro_after_init = {
+diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
+index d5d44c452624..07f0ab877f49 100644
+--- a/arch/x86/kernel/fpu/init.c
++++ b/arch/x86/kernel/fpu/init.c
+@@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
+  */
+ static void __init fpu__init_parse_early_param(void)
+ {
++      char arg[32];
++      char *argptr = arg;
++      int bit;
++
+       if (cmdline_find_option_bool(boot_command_line, "no387"))
+               setup_clear_cpu_cap(X86_FEATURE_FPU);
+ 
+@@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
+ 
+       if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
+               setup_clear_cpu_cap(X86_FEATURE_XSAVES);
++
++      if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
++                              sizeof(arg)) &&
++          get_option(&argptr, &bit) &&
++          bit >= 0 &&
++          bit < NCAPINTS * 32)
++              setup_clear_cpu_cap(bit);
+ }
+ 
+ /*
+-- 
+2.14.2
+
diff --git a/patches/kernel/0071-x86-fpu-Make-XSAVE-check-the-base-CPUID-features-bef.patch b/patches/kernel/0071-x86-fpu-Make-XSAVE-check-the-base-CPUID-features-bef.patch

new file mode 100644 (file)

index 0000000..0c563c6
--- /dev/null
+++ b/patches/kernel/0071-x86-fpu-Make-XSAVE-check-the-base-CPUID-features-bef.patch
@@ -0,0 +1,90 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andi Kleen <ak@linux.intel.com>
+Date: Fri, 13 Oct 2017 14:56:44 -0700
+Subject: [PATCH] x86/fpu: Make XSAVE check the base CPUID features before
+ enabling
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Before enabling XSAVE, not only check the XSAVE specific CPUID bits,
+but also the base CPUID features of the respective XSAVE feature.
+This allows to disable individual XSAVE states using the existing
+clearcpuid= option, which can be useful for performance testing
+and debugging, and also in general avoids inconsistencies.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/20171013215645.23166-5-andi@firstfloor.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit ccb18db2ab9d923df07e7495123fe5fb02329713)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 2efda26f9ee0eeb9919772e90ca30dbe59008dc8)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/fpu/xstate.c | 23 +++++++++++++++++++++++
+ 1 file changed, 23 insertions(+)
+
+diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
+index c24ac1efb12d..3abe85b08234 100644
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -15,6 +15,7 @@
+ #include <asm/fpu/xstate.h>
+ 
+ #include <asm/tlbflush.h>
++#include <asm/cpufeature.h>
+ 
+ /*
+  * Although we spell it out in here, the Processor Trace
+@@ -36,6 +37,19 @@ static const char *xfeature_names[] =
+       "unknown xstate feature"        ,
+ };
+ 
++static short xsave_cpuid_features[] __initdata = {
++      X86_FEATURE_FPU,
++      X86_FEATURE_XMM,
++      X86_FEATURE_AVX,
++      X86_FEATURE_MPX,
++      X86_FEATURE_MPX,
++      X86_FEATURE_AVX512F,
++      X86_FEATURE_AVX512F,
++      X86_FEATURE_AVX512F,
++      X86_FEATURE_INTEL_PT,
++      X86_FEATURE_PKU,
++};
++
+ /*
+  * Mask of xstate features supported by the CPU and the kernel:
+  */
+@@ -702,6 +716,7 @@ void __init fpu__init_system_xstate(void)
+       unsigned int eax, ebx, ecx, edx;
+       static int on_boot_cpu __initdata = 1;
+       int err;
++      int i;
+ 
+       WARN_ON_FPU(!on_boot_cpu);
+       on_boot_cpu = 0;
+@@ -735,6 +750,14 @@ void __init fpu__init_system_xstate(void)
+               goto out_disable;
+       }
+ 
++      /*
++       * Clear XSAVE features that are disabled in the normal CPUID.
++       */
++      for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
++              if (!boot_cpu_has(xsave_cpuid_features[i]))
++                      xfeatures_mask &= ~BIT(i);
++      }
++
+       xfeatures_mask &= fpu__get_supported_xfeatures_mask();
+ 
+       /* Enable xstate instructions to be able to continue with initialization: */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0071-x86-fpu-Remove-the-explicit-clearing-of-XSAVE-depend.patch b/patches/kernel/0071-x86-fpu-Remove-the-explicit-clearing-of-XSAVE-depend.patch

deleted file mode 100644 (file)

index 91e271b..0000000
--- a/patches/kernel/0071-x86-fpu-Remove-the-explicit-clearing-of-XSAVE-depend.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andi Kleen <ak@linux.intel.com>
-Date: Fri, 13 Oct 2017 14:56:45 -0700
-Subject: [PATCH] x86/fpu: Remove the explicit clearing of XSAVE dependent
- features
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Clearing a CPU feature with setup_clear_cpu_cap() clears all features
-which depend on it. Expressing feature dependencies in one place is
-easier to maintain than keeping functions like
-fpu__xstate_clear_all_cpu_caps() up to date.
-
-The features which depend on XSAVE have their dependency expressed in the
-dependency table, so its sufficient to clear X86_FEATURE_XSAVE.
-
-Remove the explicit clearing of XSAVE dependent features.
-
-Signed-off-by: Andi Kleen <ak@linux.intel.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Link: http://lkml.kernel.org/r/20171013215645.23166-6-andi@firstfloor.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 73e3a7d2a7c3be29a5a22b85026f6cfa5664267f)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit af445f9ba8bb30b47ccb5247b8f5ba28c9f2be3e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/fpu/xstate.c | 20 --------------------
- 1 file changed, 20 deletions(-)
-
-diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
-index 3abe85b08234..fd6882c42246 100644
---- a/arch/x86/kernel/fpu/xstate.c
-+++ b/arch/x86/kernel/fpu/xstate.c
-@@ -73,26 +73,6 @@ unsigned int fpu_user_xstate_size;
- void fpu__xstate_clear_all_cpu_caps(void)
- {
-       setup_clear_cpu_cap(X86_FEATURE_XSAVE);
--      setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
--      setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
--      setup_clear_cpu_cap(X86_FEATURE_XSAVES);
--      setup_clear_cpu_cap(X86_FEATURE_AVX);
--      setup_clear_cpu_cap(X86_FEATURE_AVX2);
--      setup_clear_cpu_cap(X86_FEATURE_AVX512F);
--      setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
--      setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
--      setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
--      setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
--      setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
--      setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
--      setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
--      setup_clear_cpu_cap(X86_FEATURE_MPX);
--      setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
--      setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
--      setup_clear_cpu_cap(X86_FEATURE_PKU);
--      setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
--      setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
--      setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
- }
- 
- /*
--- 
-2.14.2
-
diff --git a/patches/kernel/0072-x86-fpu-Remove-the-explicit-clearing-of-XSAVE-depend.patch b/patches/kernel/0072-x86-fpu-Remove-the-explicit-clearing-of-XSAVE-depend.patch

new file mode 100644 (file)

index 0000000..91e271b
--- /dev/null
+++ b/patches/kernel/0072-x86-fpu-Remove-the-explicit-clearing-of-XSAVE-depend.patch
@@ -0,0 +1,70 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andi Kleen <ak@linux.intel.com>
+Date: Fri, 13 Oct 2017 14:56:45 -0700
+Subject: [PATCH] x86/fpu: Remove the explicit clearing of XSAVE dependent
+ features
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Clearing a CPU feature with setup_clear_cpu_cap() clears all features
+which depend on it. Expressing feature dependencies in one place is
+easier to maintain than keeping functions like
+fpu__xstate_clear_all_cpu_caps() up to date.
+
+The features which depend on XSAVE have their dependency expressed in the
+dependency table, so its sufficient to clear X86_FEATURE_XSAVE.
+
+Remove the explicit clearing of XSAVE dependent features.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/20171013215645.23166-6-andi@firstfloor.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 73e3a7d2a7c3be29a5a22b85026f6cfa5664267f)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit af445f9ba8bb30b47ccb5247b8f5ba28c9f2be3e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/fpu/xstate.c | 20 --------------------
+ 1 file changed, 20 deletions(-)
+
+diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
+index 3abe85b08234..fd6882c42246 100644
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -73,26 +73,6 @@ unsigned int fpu_user_xstate_size;
+ void fpu__xstate_clear_all_cpu_caps(void)
+ {
+       setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+-      setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+-      setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
+-      setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX2);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX512F);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
+-      setup_clear_cpu_cap(X86_FEATURE_MPX);
+-      setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
+-      setup_clear_cpu_cap(X86_FEATURE_PKU);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
+-      setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
+ }
+ 
+ /*
+-- 
+2.14.2
+
diff --git a/patches/kernel/0072-x86-platform-UV-Convert-timers-to-use-timer_setup.patch b/patches/kernel/0072-x86-platform-UV-Convert-timers-to-use-timer_setup.patch

deleted file mode 100644 (file)

index 02e2fb8..0000000
--- a/patches/kernel/0072-x86-platform-UV-Convert-timers-to-use-timer_setup.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Kees Cook <keescook@chromium.org>
-Date: Mon, 16 Oct 2017 16:22:31 -0700
-Subject: [PATCH] x86/platform/UV: Convert timers to use timer_setup()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-In preparation for unconditionally passing the struct timer_list pointer to
-all timer callbacks, switch to using the new timer_setup() and from_timer()
-to pass the timer pointer explicitly.
-
-Signed-off-by: Kees Cook <keescook@chromium.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Dimitri Sivanich <sivanich@hpe.com>
-Cc: Russ Anderson <rja@hpe.com>
-Cc: Mike Travis <mike.travis@hpe.com>
-Link: https://lkml.kernel.org/r/20171016232231.GA100493@beast
-
-(cherry picked from commit 376f3bcebdc999cc737d9052109cc33b573b3a8b)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 869cbd2b31024e70d574527b8c6851bf2ebbe483)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/apic/x2apic_uv_x.c | 5 ++---
- 1 file changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
-index 0d57bb9079c9..c0b694810ff4 100644
---- a/arch/x86/kernel/apic/x2apic_uv_x.c
-+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
-@@ -920,9 +920,8 @@ static __init void uv_rtc_init(void)
- /*
-  * percpu heartbeat timer
-  */
--static void uv_heartbeat(unsigned long ignored)
-+static void uv_heartbeat(struct timer_list *timer)
- {
--      struct timer_list *timer = &uv_scir_info->timer;
-       unsigned char bits = uv_scir_info->state;
- 
-       /* Flip heartbeat bit: */
-@@ -947,7 +946,7 @@ static int uv_heartbeat_enable(unsigned int cpu)
-               struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
- 
-               uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
--              setup_pinned_timer(timer, uv_heartbeat, cpu);
-+              timer_setup(timer, uv_heartbeat, TIMER_PINNED);
-               timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
-               add_timer_on(timer, cpu);
-               uv_cpu_scir_info(cpu)->enabled = 1;
--- 
-2.14.2
-
diff --git a/patches/kernel/0073-objtool-Print-top-level-commands-on-incorrect-usage.patch b/patches/kernel/0073-objtool-Print-top-level-commands-on-incorrect-usage.patch

deleted file mode 100644 (file)

index 5c596ce..0000000
--- a/patches/kernel/0073-objtool-Print-top-level-commands-on-incorrect-usage.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
-Date: Sat, 14 Oct 2017 20:17:54 +0530
-Subject: [PATCH] objtool: Print top level commands on incorrect usage
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Print top-level objtool commands, along with the error on incorrect
-command line usage. Objtool command line parser exit's with code 129,
-for incorrect usage. Convert the cmd_usage() exit code also, to maintain
-consistency across objtool.
-
-After the patch:
-
-  $ ./objtool -j
-
-  Unknown option: -j
-
-  usage: objtool COMMAND [ARGS]
-
-  Commands:
-     check   Perform stack metadata validation on an object file
-     orc     Generate in-place ORC unwind tables for an object file
-
-  $ echo $?
-  129
-
-Signed-off-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
-Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/1507992474-16142-1-git-send-email-kamalesh@linux.vnet.ibm.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 6a93bb7e4a7d6670677d5b0eb980936eb9cc5d2e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit cd75c9c55a5f288e1d3f20c48c5c4c2caf3966e8)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/objtool/objtool.c | 6 ++----
- 1 file changed, 2 insertions(+), 4 deletions(-)
-
-diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
-index 31e0f9143840..07f329919828 100644
---- a/tools/objtool/objtool.c
-+++ b/tools/objtool/objtool.c
-@@ -70,7 +70,7 @@ static void cmd_usage(void)
- 
-       printf("\n");
- 
--      exit(1);
-+      exit(129);
- }
- 
- static void handle_options(int *argc, const char ***argv)
-@@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv)
-                       break;
-               } else {
-                       fprintf(stderr, "Unknown option: %s\n", cmd);
--                      fprintf(stderr, "\n Usage: %s\n",
--                              objtool_usage_string);
--                      exit(1);
-+                      cmd_usage();
-               }
- 
-               (*argv)++;
--- 
-2.14.2
-
diff --git a/patches/kernel/0073-x86-platform-UV-Convert-timers-to-use-timer_setup.patch b/patches/kernel/0073-x86-platform-UV-Convert-timers-to-use-timer_setup.patch

new file mode 100644 (file)

index 0000000..02e2fb8
--- /dev/null
+++ b/patches/kernel/0073-x86-platform-UV-Convert-timers-to-use-timer_setup.patch
@@ -0,0 +1,57 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Mon, 16 Oct 2017 16:22:31 -0700
+Subject: [PATCH] x86/platform/UV: Convert timers to use timer_setup()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+In preparation for unconditionally passing the struct timer_list pointer to
+all timer callbacks, switch to using the new timer_setup() and from_timer()
+to pass the timer pointer explicitly.
+
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Dimitri Sivanich <sivanich@hpe.com>
+Cc: Russ Anderson <rja@hpe.com>
+Cc: Mike Travis <mike.travis@hpe.com>
+Link: https://lkml.kernel.org/r/20171016232231.GA100493@beast
+
+(cherry picked from commit 376f3bcebdc999cc737d9052109cc33b573b3a8b)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 869cbd2b31024e70d574527b8c6851bf2ebbe483)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/apic/x2apic_uv_x.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
+index 0d57bb9079c9..c0b694810ff4 100644
+--- a/arch/x86/kernel/apic/x2apic_uv_x.c
++++ b/arch/x86/kernel/apic/x2apic_uv_x.c
+@@ -920,9 +920,8 @@ static __init void uv_rtc_init(void)
+ /*
+  * percpu heartbeat timer
+  */
+-static void uv_heartbeat(unsigned long ignored)
++static void uv_heartbeat(struct timer_list *timer)
+ {
+-      struct timer_list *timer = &uv_scir_info->timer;
+       unsigned char bits = uv_scir_info->state;
+ 
+       /* Flip heartbeat bit: */
+@@ -947,7 +946,7 @@ static int uv_heartbeat_enable(unsigned int cpu)
+               struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
+ 
+               uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
+-              setup_pinned_timer(timer, uv_heartbeat, cpu);
++              timer_setup(timer, uv_heartbeat, TIMER_PINNED);
+               timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
+               add_timer_on(timer, cpu);
+               uv_cpu_scir_info(cpu)->enabled = 1;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0074-objtool-Print-top-level-commands-on-incorrect-usage.patch b/patches/kernel/0074-objtool-Print-top-level-commands-on-incorrect-usage.patch

new file mode 100644 (file)

index 0000000..5c596ce
--- /dev/null
+++ b/patches/kernel/0074-objtool-Print-top-level-commands-on-incorrect-usage.patch
@@ -0,0 +1,73 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
+Date: Sat, 14 Oct 2017 20:17:54 +0530
+Subject: [PATCH] objtool: Print top level commands on incorrect usage
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Print top-level objtool commands, along with the error on incorrect
+command line usage. Objtool command line parser exit's with code 129,
+for incorrect usage. Convert the cmd_usage() exit code also, to maintain
+consistency across objtool.
+
+After the patch:
+
+  $ ./objtool -j
+
+  Unknown option: -j
+
+  usage: objtool COMMAND [ARGS]
+
+  Commands:
+     check   Perform stack metadata validation on an object file
+     orc     Generate in-place ORC unwind tables for an object file
+
+  $ echo $?
+  129
+
+Signed-off-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
+Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1507992474-16142-1-git-send-email-kamalesh@linux.vnet.ibm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 6a93bb7e4a7d6670677d5b0eb980936eb9cc5d2e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit cd75c9c55a5f288e1d3f20c48c5c4c2caf3966e8)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/objtool/objtool.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
+index 31e0f9143840..07f329919828 100644
+--- a/tools/objtool/objtool.c
++++ b/tools/objtool/objtool.c
+@@ -70,7 +70,7 @@ static void cmd_usage(void)
+ 
+       printf("\n");
+ 
+-      exit(1);
++      exit(129);
+ }
+ 
+ static void handle_options(int *argc, const char ***argv)
+@@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv)
+                       break;
+               } else {
+                       fprintf(stderr, "Unknown option: %s\n", cmd);
+-                      fprintf(stderr, "\n Usage: %s\n",
+-                              objtool_usage_string);
+-                      exit(1);
++                      cmd_usage();
+               }
+ 
+               (*argv)++;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0074-x86-cpuid-Prevent-out-of-bound-access-in-do_clear_cp.patch b/patches/kernel/0074-x86-cpuid-Prevent-out-of-bound-access-in-do_clear_cp.patch

deleted file mode 100644 (file)

index aad4047..0000000
--- a/patches/kernel/0074-x86-cpuid-Prevent-out-of-bound-access-in-do_clear_cp.patch
+++ /dev/null
@@ -1,64 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 18 Oct 2017 19:39:35 +0200
-Subject: [PATCH] x86/cpuid: Prevent out of bound access in do_clear_cpu_cap()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-do_clear_cpu_cap() allocates a bitmap to keep track of disabled feature
-dependencies. That bitmap is sized NCAPINTS * BITS_PER_INIT. The possible
-'features' which can be handed in are larger than this, because after the
-capabilities the bug 'feature' bits occupy another 32bit. Not really
-obvious...
-
-So clearing any of the misfeature bits, as 32bit does for the F00F bug,
-accesses that bitmap out of bounds thereby corrupting the stack.
-
-Size the bitmap proper and add a sanity check to catch accidental out of
-bound access.
-
-Fixes: 0b00de857a64 ("x86/cpuid: Add generic table for CPUID dependencies")
-Reported-by: kernel test robot <xiaolong.ye@intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andi Kleen <ak@linux.intel.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Link: https://lkml.kernel.org/r/20171018022023.GA12058@yexl-desktop
-(cherry picked from commit 57b8b1a1856adaa849d02d547411a553a531022b)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 4b3a90bd20b35a97fd9ca6f6a71131f4417782e4)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/cpuid-deps.c | 10 ++++++++--
- 1 file changed, 8 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
-index e48eb7313120..c1d49842a411 100644
---- a/arch/x86/kernel/cpu/cpuid-deps.c
-+++ b/arch/x86/kernel/cpu/cpuid-deps.c
-@@ -75,11 +75,17 @@ static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
-               __clear_cpu_cap(c, feature);
- }
- 
-+/* Take the capabilities and the BUG bits into account */
-+#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
-+
- static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
- {
--      bool changed;
--      DECLARE_BITMAP(disable, NCAPINTS * sizeof(u32) * 8);
-+      DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
-       const struct cpuid_dep *d;
-+      bool changed;
-+
-+      if (WARN_ON(feature >= MAX_FEATURE_BITS))
-+              return;
- 
-       clear_feature(c, feature);
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0075-x86-cpuid-Prevent-out-of-bound-access-in-do_clear_cp.patch b/patches/kernel/0075-x86-cpuid-Prevent-out-of-bound-access-in-do_clear_cp.patch

new file mode 100644 (file)

index 0000000..aad4047
--- /dev/null
+++ b/patches/kernel/0075-x86-cpuid-Prevent-out-of-bound-access-in-do_clear_cp.patch
@@ -0,0 +1,64 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 18 Oct 2017 19:39:35 +0200
+Subject: [PATCH] x86/cpuid: Prevent out of bound access in do_clear_cpu_cap()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+do_clear_cpu_cap() allocates a bitmap to keep track of disabled feature
+dependencies. That bitmap is sized NCAPINTS * BITS_PER_INIT. The possible
+'features' which can be handed in are larger than this, because after the
+capabilities the bug 'feature' bits occupy another 32bit. Not really
+obvious...
+
+So clearing any of the misfeature bits, as 32bit does for the F00F bug,
+accesses that bitmap out of bounds thereby corrupting the stack.
+
+Size the bitmap proper and add a sanity check to catch accidental out of
+bound access.
+
+Fixes: 0b00de857a64 ("x86/cpuid: Add generic table for CPUID dependencies")
+Reported-by: kernel test robot <xiaolong.ye@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Link: https://lkml.kernel.org/r/20171018022023.GA12058@yexl-desktop
+(cherry picked from commit 57b8b1a1856adaa849d02d547411a553a531022b)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 4b3a90bd20b35a97fd9ca6f6a71131f4417782e4)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/cpuid-deps.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
+index e48eb7313120..c1d49842a411 100644
+--- a/arch/x86/kernel/cpu/cpuid-deps.c
++++ b/arch/x86/kernel/cpu/cpuid-deps.c
+@@ -75,11 +75,17 @@ static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
+               __clear_cpu_cap(c, feature);
+ }
+ 
++/* Take the capabilities and the BUG bits into account */
++#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
++
+ static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+ {
+-      bool changed;
+-      DECLARE_BITMAP(disable, NCAPINTS * sizeof(u32) * 8);
++      DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
+       const struct cpuid_dep *d;
++      bool changed;
++
++      if (WARN_ON(feature >= MAX_FEATURE_BITS))
++              return;
+ 
+       clear_feature(c, feature);
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0075-x86-entry-Use-SYSCALL_DEFINE-macros-for-sys_modify_l.patch b/patches/kernel/0075-x86-entry-Use-SYSCALL_DEFINE-macros-for-sys_modify_l.patch

deleted file mode 100644 (file)

index af37b4e..0000000
--- a/patches/kernel/0075-x86-entry-Use-SYSCALL_DEFINE-macros-for-sys_modify_l.patch
+++ /dev/null
@@ -1,124 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Wed, 18 Oct 2017 10:21:07 -0700
-Subject: [PATCH] x86/entry: Use SYSCALL_DEFINE() macros for sys_modify_ldt()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-We do not have tracepoints for sys_modify_ldt() because we define
-it directly instead of using the normal SYSCALL_DEFINEx() macros.
-
-However, there is a reason sys_modify_ldt() does not use the macros:
-it has an 'int' return type instead of 'unsigned long'.  This is
-a bug, but it's a bug cemented in the ABI.
-
-What does this mean?  If we return -EINVAL from a function that
-returns 'int', we have 0x00000000ffffffea in %rax.  But, if we
-return -EINVAL from a function returning 'unsigned long', we end
-up with 0xffffffffffffffea in %rax, which is wrong.
-
-To work around this and maintain the 'int' behavior while using
-the SYSCALL_DEFINEx() macros, so we add a cast to 'unsigned int'
-in both implementations of sys_modify_ldt().
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Reviewed-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Brian Gerst <brgerst@gmail.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/20171018172107.1A79C532@viggo.jf.intel.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit da20ab35180780e4a6eadc804544f1fa967f3567)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d865f635f4b2c3307e79de9be5c49ea8bd4c43a6)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/syscalls.h |  2 +-
- arch/x86/kernel/ldt.c           | 16 +++++++++++++---
- arch/x86/um/ldt.c               |  7 +++++--
- 3 files changed, 19 insertions(+), 6 deletions(-)
-
-diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
-index 91dfcafe27a6..bad25bb80679 100644
---- a/arch/x86/include/asm/syscalls.h
-+++ b/arch/x86/include/asm/syscalls.h
-@@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
- asmlinkage long sys_iopl(unsigned int);
- 
- /* kernel/ldt.c */
--asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
-+asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
- 
- /* kernel/signal.c */
- asmlinkage long sys_rt_sigreturn(void);
-diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
-index f0e64db18ac8..0402d44deb4d 100644
---- a/arch/x86/kernel/ldt.c
-+++ b/arch/x86/kernel/ldt.c
-@@ -12,6 +12,7 @@
- #include <linux/string.h>
- #include <linux/mm.h>
- #include <linux/smp.h>
-+#include <linux/syscalls.h>
- #include <linux/slab.h>
- #include <linux/vmalloc.h>
- #include <linux/uaccess.h>
-@@ -294,8 +295,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
-       return error;
- }
- 
--asmlinkage int sys_modify_ldt(int func, void __user *ptr,
--                            unsigned long bytecount)
-+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
-+              unsigned long , bytecount)
- {
-       int ret = -ENOSYS;
- 
-@@ -313,5 +314,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr,
-               ret = write_ldt(ptr, bytecount, 0);
-               break;
-       }
--      return ret;
-+      /*
-+       * The SYSCALL_DEFINE() macros give us an 'unsigned long'
-+       * return type, but tht ABI for sys_modify_ldt() expects
-+       * 'int'.  This cast gives us an int-sized value in %rax
-+       * for the return code.  The 'unsigned' is necessary so
-+       * the compiler does not try to sign-extend the negative
-+       * return codes into the high half of the register when
-+       * taking the value from int->long.
-+       */
-+      return (unsigned int)ret;
- }
-diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
-index 836a1eb5df43..3ee234b6234d 100644
---- a/arch/x86/um/ldt.c
-+++ b/arch/x86/um/ldt.c
-@@ -6,6 +6,7 @@
- #include <linux/mm.h>
- #include <linux/sched.h>
- #include <linux/slab.h>
-+#include <linux/syscalls.h>
- #include <linux/uaccess.h>
- #include <asm/unistd.h>
- #include <os.h>
-@@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm)
-       mm->arch.ldt.entry_count = 0;
- }
- 
--int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
-+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
-+              unsigned long , bytecount)
- {
--      return do_modify_ldt_skas(func, ptr, bytecount);
-+      /* See non-um modify_ldt() for why we do this cast */
-+      return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
- }
--- 
-2.14.2
-
diff --git a/patches/kernel/0076-mm-sparsemem-Allocate-mem_section-at-runtime-for-CON.patch b/patches/kernel/0076-mm-sparsemem-Allocate-mem_section-at-runtime-for-CON.patch

deleted file mode 100644 (file)

index d0045b1..0000000
--- a/patches/kernel/0076-mm-sparsemem-Allocate-mem_section-at-runtime-for-CON.patch
+++ /dev/null
@@ -1,141 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
-Date: Fri, 29 Sep 2017 17:08:16 +0300
-Subject: [PATCH] mm/sparsemem: Allocate mem_section at runtime for
- CONFIG_SPARSEMEM_EXTREME=y
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Size of the mem_section[] array depends on the size of the physical address space.
-
-In preparation for boot-time switching between paging modes on x86-64
-we need to make the allocation of mem_section[] dynamic, because otherwise
-we waste a lot of RAM: with CONFIG_NODE_SHIFT=10, mem_section[] size is 32kB
-for 4-level paging and 2MB for 5-level paging mode.
-
-The patch allocates the array on the first call to sparse_memory_present_with_active_regions().
-
-Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andy Lutomirski <luto@amacapital.net>
-Cc: Borislav Petkov <bp@suse.de>
-Cc: Cyrill Gorcunov <gorcunov@openvz.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/20170929140821.37654-2-kirill.shutemov@linux.intel.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 83e3c48729d9ebb7af5a31a504f3fd6aff0348c4)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit c70f71e01a0ae5d884abae0424618abe90b82011)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- include/linux/mmzone.h |  6 +++++-
- mm/page_alloc.c        | 10 ++++++++++
- mm/sparse.c            | 17 +++++++++++------
- 3 files changed, 26 insertions(+), 7 deletions(-)
-
-diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
-index fc14b8b3f6ce..9c6c001a8c6c 100644
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -1137,13 +1137,17 @@ struct mem_section {
- #define SECTION_ROOT_MASK     (SECTIONS_PER_ROOT - 1)
- 
- #ifdef CONFIG_SPARSEMEM_EXTREME
--extern struct mem_section *mem_section[NR_SECTION_ROOTS];
-+extern struct mem_section **mem_section;
- #else
- extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
- #endif
- 
- static inline struct mem_section *__nr_to_section(unsigned long nr)
- {
-+#ifdef CONFIG_SPARSEMEM_EXTREME
-+      if (!mem_section)
-+              return NULL;
-+#endif
-       if (!mem_section[SECTION_NR_TO_ROOT(nr)])
-               return NULL;
-       return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 1423da8dd16f..66eb23ab658d 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -5707,6 +5707,16 @@ void __init sparse_memory_present_with_active_regions(int nid)
-       unsigned long start_pfn, end_pfn;
-       int i, this_nid;
- 
-+#ifdef CONFIG_SPARSEMEM_EXTREME
-+      if (!mem_section) {
-+              unsigned long size, align;
-+
-+              size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
-+              align = 1 << (INTERNODE_CACHE_SHIFT);
-+              mem_section = memblock_virt_alloc(size, align);
-+      }
-+#endif
-+
-       for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
-               memory_present(this_nid, start_pfn, end_pfn);
- }
-diff --git a/mm/sparse.c b/mm/sparse.c
-index cdce7a7bb3f3..308a0789d1bb 100644
---- a/mm/sparse.c
-+++ b/mm/sparse.c
-@@ -22,8 +22,7 @@
-  * 1) mem_section     - memory sections, mem_map's for valid memory
-  */
- #ifdef CONFIG_SPARSEMEM_EXTREME
--struct mem_section *mem_section[NR_SECTION_ROOTS]
--      ____cacheline_internodealigned_in_smp;
-+struct mem_section **mem_section;
- #else
- struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
-       ____cacheline_internodealigned_in_smp;
-@@ -104,7 +103,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
- int __section_nr(struct mem_section* ms)
- {
-       unsigned long root_nr;
--      struct mem_section* root;
-+      struct mem_section *root = NULL;
- 
-       for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
-               root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
-@@ -115,7 +114,7 @@ int __section_nr(struct mem_section* ms)
-                    break;
-       }
- 
--      VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
-+      VM_BUG_ON(!root);
- 
-       return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
- }
-@@ -333,11 +332,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
- static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
- {
-       unsigned long usemap_snr, pgdat_snr;
--      static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
--      static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
-+      static unsigned long old_usemap_snr;
-+      static unsigned long old_pgdat_snr;
-       struct pglist_data *pgdat = NODE_DATA(nid);
-       int usemap_nid;
- 
-+      /* First call */
-+      if (!old_usemap_snr) {
-+              old_usemap_snr = NR_MEM_SECTIONS;
-+              old_pgdat_snr = NR_MEM_SECTIONS;
-+      }
-+
-       usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
-       pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
-       if (usemap_snr == pgdat_snr)
--- 
-2.14.2
-
diff --git a/patches/kernel/0076-x86-entry-Use-SYSCALL_DEFINE-macros-for-sys_modify_l.patch b/patches/kernel/0076-x86-entry-Use-SYSCALL_DEFINE-macros-for-sys_modify_l.patch

new file mode 100644 (file)

index 0000000..af37b4e
--- /dev/null
+++ b/patches/kernel/0076-x86-entry-Use-SYSCALL_DEFINE-macros-for-sys_modify_l.patch
@@ -0,0 +1,124 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Wed, 18 Oct 2017 10:21:07 -0700
+Subject: [PATCH] x86/entry: Use SYSCALL_DEFINE() macros for sys_modify_ldt()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+We do not have tracepoints for sys_modify_ldt() because we define
+it directly instead of using the normal SYSCALL_DEFINEx() macros.
+
+However, there is a reason sys_modify_ldt() does not use the macros:
+it has an 'int' return type instead of 'unsigned long'.  This is
+a bug, but it's a bug cemented in the ABI.
+
+What does this mean?  If we return -EINVAL from a function that
+returns 'int', we have 0x00000000ffffffea in %rax.  But, if we
+return -EINVAL from a function returning 'unsigned long', we end
+up with 0xffffffffffffffea in %rax, which is wrong.
+
+To work around this and maintain the 'int' behavior while using
+the SYSCALL_DEFINEx() macros, so we add a cast to 'unsigned int'
+in both implementations of sys_modify_ldt().
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Brian Gerst <brgerst@gmail.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20171018172107.1A79C532@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit da20ab35180780e4a6eadc804544f1fa967f3567)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d865f635f4b2c3307e79de9be5c49ea8bd4c43a6)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/syscalls.h |  2 +-
+ arch/x86/kernel/ldt.c           | 16 +++++++++++++---
+ arch/x86/um/ldt.c               |  7 +++++--
+ 3 files changed, 19 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
+index 91dfcafe27a6..bad25bb80679 100644
+--- a/arch/x86/include/asm/syscalls.h
++++ b/arch/x86/include/asm/syscalls.h
+@@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
+ asmlinkage long sys_iopl(unsigned int);
+ 
+ /* kernel/ldt.c */
+-asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
++asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
+ 
+ /* kernel/signal.c */
+ asmlinkage long sys_rt_sigreturn(void);
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
+index f0e64db18ac8..0402d44deb4d 100644
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -12,6 +12,7 @@
+ #include <linux/string.h>
+ #include <linux/mm.h>
+ #include <linux/smp.h>
++#include <linux/syscalls.h>
+ #include <linux/slab.h>
+ #include <linux/vmalloc.h>
+ #include <linux/uaccess.h>
+@@ -294,8 +295,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
+       return error;
+ }
+ 
+-asmlinkage int sys_modify_ldt(int func, void __user *ptr,
+-                            unsigned long bytecount)
++SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
++              unsigned long , bytecount)
+ {
+       int ret = -ENOSYS;
+ 
+@@ -313,5 +314,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr,
+               ret = write_ldt(ptr, bytecount, 0);
+               break;
+       }
+-      return ret;
++      /*
++       * The SYSCALL_DEFINE() macros give us an 'unsigned long'
++       * return type, but tht ABI for sys_modify_ldt() expects
++       * 'int'.  This cast gives us an int-sized value in %rax
++       * for the return code.  The 'unsigned' is necessary so
++       * the compiler does not try to sign-extend the negative
++       * return codes into the high half of the register when
++       * taking the value from int->long.
++       */
++      return (unsigned int)ret;
+ }
+diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
+index 836a1eb5df43..3ee234b6234d 100644
+--- a/arch/x86/um/ldt.c
++++ b/arch/x86/um/ldt.c
+@@ -6,6 +6,7 @@
+ #include <linux/mm.h>
+ #include <linux/sched.h>
+ #include <linux/slab.h>
++#include <linux/syscalls.h>
+ #include <linux/uaccess.h>
+ #include <asm/unistd.h>
+ #include <os.h>
+@@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm)
+       mm->arch.ldt.entry_count = 0;
+ }
+ 
+-int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
++SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
++              unsigned long , bytecount)
+ {
+-      return do_modify_ldt_skas(func, ptr, bytecount);
++      /* See non-um modify_ldt() for why we do this cast */
++      return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
+ }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0077-mm-sparsemem-Allocate-mem_section-at-runtime-for-CON.patch b/patches/kernel/0077-mm-sparsemem-Allocate-mem_section-at-runtime-for-CON.patch

new file mode 100644 (file)

index 0000000..d0045b1
--- /dev/null
+++ b/patches/kernel/0077-mm-sparsemem-Allocate-mem_section-at-runtime-for-CON.patch
@@ -0,0 +1,141 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Fri, 29 Sep 2017 17:08:16 +0300
+Subject: [PATCH] mm/sparsemem: Allocate mem_section at runtime for
+ CONFIG_SPARSEMEM_EXTREME=y
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Size of the mem_section[] array depends on the size of the physical address space.
+
+In preparation for boot-time switching between paging modes on x86-64
+we need to make the allocation of mem_section[] dynamic, because otherwise
+we waste a lot of RAM: with CONFIG_NODE_SHIFT=10, mem_section[] size is 32kB
+for 4-level paging and 2MB for 5-level paging mode.
+
+The patch allocates the array on the first call to sparse_memory_present_with_active_regions().
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/20170929140821.37654-2-kirill.shutemov@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 83e3c48729d9ebb7af5a31a504f3fd6aff0348c4)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit c70f71e01a0ae5d884abae0424618abe90b82011)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ include/linux/mmzone.h |  6 +++++-
+ mm/page_alloc.c        | 10 ++++++++++
+ mm/sparse.c            | 17 +++++++++++------
+ 3 files changed, 26 insertions(+), 7 deletions(-)
+
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index fc14b8b3f6ce..9c6c001a8c6c 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -1137,13 +1137,17 @@ struct mem_section {
+ #define SECTION_ROOT_MASK     (SECTIONS_PER_ROOT - 1)
+ 
+ #ifdef CONFIG_SPARSEMEM_EXTREME
+-extern struct mem_section *mem_section[NR_SECTION_ROOTS];
++extern struct mem_section **mem_section;
+ #else
+ extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
+ #endif
+ 
+ static inline struct mem_section *__nr_to_section(unsigned long nr)
+ {
++#ifdef CONFIG_SPARSEMEM_EXTREME
++      if (!mem_section)
++              return NULL;
++#endif
+       if (!mem_section[SECTION_NR_TO_ROOT(nr)])
+               return NULL;
+       return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 1423da8dd16f..66eb23ab658d 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -5707,6 +5707,16 @@ void __init sparse_memory_present_with_active_regions(int nid)
+       unsigned long start_pfn, end_pfn;
+       int i, this_nid;
+ 
++#ifdef CONFIG_SPARSEMEM_EXTREME
++      if (!mem_section) {
++              unsigned long size, align;
++
++              size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
++              align = 1 << (INTERNODE_CACHE_SHIFT);
++              mem_section = memblock_virt_alloc(size, align);
++      }
++#endif
++
+       for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
+               memory_present(this_nid, start_pfn, end_pfn);
+ }
+diff --git a/mm/sparse.c b/mm/sparse.c
+index cdce7a7bb3f3..308a0789d1bb 100644
+--- a/mm/sparse.c
++++ b/mm/sparse.c
+@@ -22,8 +22,7 @@
+  * 1) mem_section     - memory sections, mem_map's for valid memory
+  */
+ #ifdef CONFIG_SPARSEMEM_EXTREME
+-struct mem_section *mem_section[NR_SECTION_ROOTS]
+-      ____cacheline_internodealigned_in_smp;
++struct mem_section **mem_section;
+ #else
+ struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
+       ____cacheline_internodealigned_in_smp;
+@@ -104,7 +103,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
+ int __section_nr(struct mem_section* ms)
+ {
+       unsigned long root_nr;
+-      struct mem_section* root;
++      struct mem_section *root = NULL;
+ 
+       for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
+               root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
+@@ -115,7 +114,7 @@ int __section_nr(struct mem_section* ms)
+                    break;
+       }
+ 
+-      VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
++      VM_BUG_ON(!root);
+ 
+       return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
+ }
+@@ -333,11 +332,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
+ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+ {
+       unsigned long usemap_snr, pgdat_snr;
+-      static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
+-      static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
++      static unsigned long old_usemap_snr;
++      static unsigned long old_pgdat_snr;
+       struct pglist_data *pgdat = NODE_DATA(nid);
+       int usemap_nid;
+ 
++      /* First call */
++      if (!old_usemap_snr) {
++              old_usemap_snr = NR_MEM_SECTIONS;
++              old_pgdat_snr = NR_MEM_SECTIONS;
++      }
++
+       usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
+       pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+       if (usemap_snr == pgdat_snr)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0077-x86-kasan-Use-the-same-shadow-offset-for-4-and-5-lev.patch b/patches/kernel/0077-x86-kasan-Use-the-same-shadow-offset-for-4-and-5-lev.patch

deleted file mode 100644 (file)

index ddf50de..0000000
--- a/patches/kernel/0077-x86-kasan-Use-the-same-shadow-offset-for-4-and-5-lev.patch
+++ /dev/null
@@ -1,244 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Date: Fri, 29 Sep 2017 17:08:18 +0300
-Subject: [PATCH] x86/kasan: Use the same shadow offset for 4- and 5-level
- paging
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-We are going to support boot-time switching between 4- and 5-level
-paging. For KASAN it means we cannot have different KASAN_SHADOW_OFFSET
-for different paging modes: the constant is passed to gcc to generate
-code and cannot be changed at runtime.
-
-This patch changes KASAN code to use 0xdffffc0000000000 as shadow offset
-for both 4- and 5-level paging.
-
-For 5-level paging it means that shadow memory region is not aligned to
-PGD boundary anymore and we have to handle unaligned parts of the region
-properly.
-
-In addition, we have to exclude paravirt code from KASAN instrumentation
-as we now use set_pgd() before KASAN is fully ready.
-
-[kirill.shutemov@linux.intel.com: clenaup, changelog message]
-Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andy Lutomirski <luto@amacapital.net>
-Cc: Borislav Petkov <bp@suse.de>
-Cc: Cyrill Gorcunov <gorcunov@openvz.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/20170929140821.37654-4-kirill.shutemov@linux.intel.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 12a8cc7fcf54a8575f094be1e99032ec38aa045c)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 2ce428150e002623aa0ed2a1ab840fde5f860f32)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/x86/x86_64/mm.txt |   2 +-
- arch/x86/kernel/Makefile        |   3 +-
- arch/x86/mm/kasan_init_64.c     | 101 +++++++++++++++++++++++++++++++---------
- arch/x86/Kconfig                |   1 -
- 4 files changed, 83 insertions(+), 24 deletions(-)
-
-diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
-index b0798e281aa6..3448e675b462 100644
---- a/Documentation/x86/x86_64/mm.txt
-+++ b/Documentation/x86/x86_64/mm.txt
-@@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
- ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
- ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
- ... unused hole ...
--ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
-+ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
- ... unused hole ...
- ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
- ... unused hole ...
-diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
-index 5bf0d5a473b4..aa059806201d 100644
---- a/arch/x86/kernel/Makefile
-+++ b/arch/x86/kernel/Makefile
-@@ -24,7 +24,8 @@ endif
- KASAN_SANITIZE_head$(BITS).o                          := n
- KASAN_SANITIZE_dumpstack.o                            := n
- KASAN_SANITIZE_dumpstack_$(BITS).o                    := n
--KASAN_SANITIZE_stacktrace.o := n
-+KASAN_SANITIZE_stacktrace.o                           := n
-+KASAN_SANITIZE_paravirt.o                             := n
- 
- OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o   := y
- OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o            := y
-diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
-index 02c9d7553409..464089f33e80 100644
---- a/arch/x86/mm/kasan_init_64.c
-+++ b/arch/x86/mm/kasan_init_64.c
-@@ -15,6 +15,8 @@
- extern pgd_t early_top_pgt[PTRS_PER_PGD];
- extern struct range pfn_mapped[E820_MAX_ENTRIES];
- 
-+static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
-+
- static int __init map_range(struct range *range)
- {
-       unsigned long start;
-@@ -30,8 +32,10 @@ static void __init clear_pgds(unsigned long start,
-                       unsigned long end)
- {
-       pgd_t *pgd;
-+      /* See comment in kasan_init() */
-+      unsigned long pgd_end = end & PGDIR_MASK;
- 
--      for (; start < end; start += PGDIR_SIZE) {
-+      for (; start < pgd_end; start += PGDIR_SIZE) {
-               pgd = pgd_offset_k(start);
-               /*
-                * With folded p4d, pgd_clear() is nop, use p4d_clear()
-@@ -42,29 +46,61 @@ static void __init clear_pgds(unsigned long start,
-               else
-                       pgd_clear(pgd);
-       }
-+
-+      pgd = pgd_offset_k(start);
-+      for (; start < end; start += P4D_SIZE)
-+              p4d_clear(p4d_offset(pgd, start));
-+}
-+
-+static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
-+{
-+      unsigned long p4d;
-+
-+      if (!IS_ENABLED(CONFIG_X86_5LEVEL))
-+              return (p4d_t *)pgd;
-+
-+      p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
-+      p4d += __START_KERNEL_map - phys_base;
-+      return (p4d_t *)p4d + p4d_index(addr);
-+}
-+
-+static void __init kasan_early_p4d_populate(pgd_t *pgd,
-+              unsigned long addr,
-+              unsigned long end)
-+{
-+      pgd_t pgd_entry;
-+      p4d_t *p4d, p4d_entry;
-+      unsigned long next;
-+
-+      if (pgd_none(*pgd)) {
-+              pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
-+              set_pgd(pgd, pgd_entry);
-+      }
-+
-+      p4d = early_p4d_offset(pgd, addr);
-+      do {
-+              next = p4d_addr_end(addr, end);
-+
-+              if (!p4d_none(*p4d))
-+                      continue;
-+
-+              p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
-+              set_p4d(p4d, p4d_entry);
-+      } while (p4d++, addr = next, addr != end && p4d_none(*p4d));
- }
- 
- static void __init kasan_map_early_shadow(pgd_t *pgd)
- {
--      int i;
--      unsigned long start = KASAN_SHADOW_START;
-+      /* See comment in kasan_init() */
-+      unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
-       unsigned long end = KASAN_SHADOW_END;
-+      unsigned long next;
- 
--      for (i = pgd_index(start); start < end; i++) {
--              switch (CONFIG_PGTABLE_LEVELS) {
--              case 4:
--                      pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) |
--                                      _KERNPG_TABLE);
--                      break;
--              case 5:
--                      pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
--                                      _KERNPG_TABLE);
--                      break;
--              default:
--                      BUILD_BUG();
--              }
--              start += PGDIR_SIZE;
--      }
-+      pgd += pgd_index(addr);
-+      do {
-+              next = pgd_addr_end(addr, end);
-+              kasan_early_p4d_populate(pgd, addr, next);
-+      } while (pgd++, addr = next, addr != end);
- }
- 
- #ifdef CONFIG_KASAN_INLINE
-@@ -101,7 +137,7 @@ void __init kasan_early_init(void)
-       for (i = 0; i < PTRS_PER_PUD; i++)
-               kasan_zero_pud[i] = __pud(pud_val);
- 
--      for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
-+      for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
-               kasan_zero_p4d[i] = __p4d(p4d_val);
- 
-       kasan_map_early_shadow(early_top_pgt);
-@@ -117,12 +153,35 @@ void __init kasan_init(void)
- #endif
- 
-       memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
-+
-+      /*
-+       * We use the same shadow offset for 4- and 5-level paging to
-+       * facilitate boot-time switching between paging modes.
-+       * As result in 5-level paging mode KASAN_SHADOW_START and
-+       * KASAN_SHADOW_END are not aligned to PGD boundary.
-+       *
-+       * KASAN_SHADOW_START doesn't share PGD with anything else.
-+       * We claim whole PGD entry to make things easier.
-+       *
-+       * KASAN_SHADOW_END lands in the last PGD entry and it collides with
-+       * bunch of things like kernel code, modules, EFI mapping, etc.
-+       * We need to take extra steps to not overwrite them.
-+       */
-+      if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-+              void *ptr;
-+
-+              ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
-+              memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
-+              set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
-+                              __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
-+      }
-+
-       load_cr3(early_top_pgt);
-       __flush_tlb_all();
- 
--      clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
-+      clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
- 
--      kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
-+      kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
-                       kasan_mem_to_shadow((void *)PAGE_OFFSET));
- 
-       for (i = 0; i < E820_MAX_ENTRIES; i++) {
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index bf9f03740c30..67d07802ae95 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -300,7 +300,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
- config KASAN_SHADOW_OFFSET
-       hex
-       depends on KASAN
--      default 0xdff8000000000000 if X86_5LEVEL
-       default 0xdffffc0000000000
- 
- config HAVE_INTEL_TXT
--- 
-2.14.2
-
diff --git a/patches/kernel/0078-x86-kasan-Use-the-same-shadow-offset-for-4-and-5-lev.patch b/patches/kernel/0078-x86-kasan-Use-the-same-shadow-offset-for-4-and-5-lev.patch

new file mode 100644 (file)

index 0000000..ddf50de
--- /dev/null
+++ b/patches/kernel/0078-x86-kasan-Use-the-same-shadow-offset-for-4-and-5-lev.patch
@@ -0,0 +1,244 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Date: Fri, 29 Sep 2017 17:08:18 +0300
+Subject: [PATCH] x86/kasan: Use the same shadow offset for 4- and 5-level
+ paging
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+We are going to support boot-time switching between 4- and 5-level
+paging. For KASAN it means we cannot have different KASAN_SHADOW_OFFSET
+for different paging modes: the constant is passed to gcc to generate
+code and cannot be changed at runtime.
+
+This patch changes KASAN code to use 0xdffffc0000000000 as shadow offset
+for both 4- and 5-level paging.
+
+For 5-level paging it means that shadow memory region is not aligned to
+PGD boundary anymore and we have to handle unaligned parts of the region
+properly.
+
+In addition, we have to exclude paravirt code from KASAN instrumentation
+as we now use set_pgd() before KASAN is fully ready.
+
+[kirill.shutemov@linux.intel.com: clenaup, changelog message]
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/20170929140821.37654-4-kirill.shutemov@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 12a8cc7fcf54a8575f094be1e99032ec38aa045c)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 2ce428150e002623aa0ed2a1ab840fde5f860f32)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/x86/x86_64/mm.txt |   2 +-
+ arch/x86/kernel/Makefile        |   3 +-
+ arch/x86/mm/kasan_init_64.c     | 101 +++++++++++++++++++++++++++++++---------
+ arch/x86/Kconfig                |   1 -
+ 4 files changed, 83 insertions(+), 24 deletions(-)
+
+diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
+index b0798e281aa6..3448e675b462 100644
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
+ ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
+ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
+ ... unused hole ...
+-ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
++ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
+ ... unused hole ...
+ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ... unused hole ...
+diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
+index 5bf0d5a473b4..aa059806201d 100644
+--- a/arch/x86/kernel/Makefile
++++ b/arch/x86/kernel/Makefile
+@@ -24,7 +24,8 @@ endif
+ KASAN_SANITIZE_head$(BITS).o                          := n
+ KASAN_SANITIZE_dumpstack.o                            := n
+ KASAN_SANITIZE_dumpstack_$(BITS).o                    := n
+-KASAN_SANITIZE_stacktrace.o := n
++KASAN_SANITIZE_stacktrace.o                           := n
++KASAN_SANITIZE_paravirt.o                             := n
+ 
+ OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o   := y
+ OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o            := y
+diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
+index 02c9d7553409..464089f33e80 100644
+--- a/arch/x86/mm/kasan_init_64.c
++++ b/arch/x86/mm/kasan_init_64.c
+@@ -15,6 +15,8 @@
+ extern pgd_t early_top_pgt[PTRS_PER_PGD];
+ extern struct range pfn_mapped[E820_MAX_ENTRIES];
+ 
++static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
++
+ static int __init map_range(struct range *range)
+ {
+       unsigned long start;
+@@ -30,8 +32,10 @@ static void __init clear_pgds(unsigned long start,
+                       unsigned long end)
+ {
+       pgd_t *pgd;
++      /* See comment in kasan_init() */
++      unsigned long pgd_end = end & PGDIR_MASK;
+ 
+-      for (; start < end; start += PGDIR_SIZE) {
++      for (; start < pgd_end; start += PGDIR_SIZE) {
+               pgd = pgd_offset_k(start);
+               /*
+                * With folded p4d, pgd_clear() is nop, use p4d_clear()
+@@ -42,29 +46,61 @@ static void __init clear_pgds(unsigned long start,
+               else
+                       pgd_clear(pgd);
+       }
++
++      pgd = pgd_offset_k(start);
++      for (; start < end; start += P4D_SIZE)
++              p4d_clear(p4d_offset(pgd, start));
++}
++
++static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
++{
++      unsigned long p4d;
++
++      if (!IS_ENABLED(CONFIG_X86_5LEVEL))
++              return (p4d_t *)pgd;
++
++      p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
++      p4d += __START_KERNEL_map - phys_base;
++      return (p4d_t *)p4d + p4d_index(addr);
++}
++
++static void __init kasan_early_p4d_populate(pgd_t *pgd,
++              unsigned long addr,
++              unsigned long end)
++{
++      pgd_t pgd_entry;
++      p4d_t *p4d, p4d_entry;
++      unsigned long next;
++
++      if (pgd_none(*pgd)) {
++              pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
++              set_pgd(pgd, pgd_entry);
++      }
++
++      p4d = early_p4d_offset(pgd, addr);
++      do {
++              next = p4d_addr_end(addr, end);
++
++              if (!p4d_none(*p4d))
++                      continue;
++
++              p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
++              set_p4d(p4d, p4d_entry);
++      } while (p4d++, addr = next, addr != end && p4d_none(*p4d));
+ }
+ 
+ static void __init kasan_map_early_shadow(pgd_t *pgd)
+ {
+-      int i;
+-      unsigned long start = KASAN_SHADOW_START;
++      /* See comment in kasan_init() */
++      unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
+       unsigned long end = KASAN_SHADOW_END;
++      unsigned long next;
+ 
+-      for (i = pgd_index(start); start < end; i++) {
+-              switch (CONFIG_PGTABLE_LEVELS) {
+-              case 4:
+-                      pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) |
+-                                      _KERNPG_TABLE);
+-                      break;
+-              case 5:
+-                      pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
+-                                      _KERNPG_TABLE);
+-                      break;
+-              default:
+-                      BUILD_BUG();
+-              }
+-              start += PGDIR_SIZE;
+-      }
++      pgd += pgd_index(addr);
++      do {
++              next = pgd_addr_end(addr, end);
++              kasan_early_p4d_populate(pgd, addr, next);
++      } while (pgd++, addr = next, addr != end);
+ }
+ 
+ #ifdef CONFIG_KASAN_INLINE
+@@ -101,7 +137,7 @@ void __init kasan_early_init(void)
+       for (i = 0; i < PTRS_PER_PUD; i++)
+               kasan_zero_pud[i] = __pud(pud_val);
+ 
+-      for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
++      for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
+               kasan_zero_p4d[i] = __p4d(p4d_val);
+ 
+       kasan_map_early_shadow(early_top_pgt);
+@@ -117,12 +153,35 @@ void __init kasan_init(void)
+ #endif
+ 
+       memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
++
++      /*
++       * We use the same shadow offset for 4- and 5-level paging to
++       * facilitate boot-time switching between paging modes.
++       * As result in 5-level paging mode KASAN_SHADOW_START and
++       * KASAN_SHADOW_END are not aligned to PGD boundary.
++       *
++       * KASAN_SHADOW_START doesn't share PGD with anything else.
++       * We claim whole PGD entry to make things easier.
++       *
++       * KASAN_SHADOW_END lands in the last PGD entry and it collides with
++       * bunch of things like kernel code, modules, EFI mapping, etc.
++       * We need to take extra steps to not overwrite them.
++       */
++      if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
++              void *ptr;
++
++              ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
++              memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
++              set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
++                              __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
++      }
++
+       load_cr3(early_top_pgt);
+       __flush_tlb_all();
+ 
+-      clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
++      clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
+ 
+-      kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
++      kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
+                       kasan_mem_to_shadow((void *)PAGE_OFFSET));
+ 
+       for (i = 0; i < E820_MAX_ENTRIES; i++) {
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index bf9f03740c30..67d07802ae95 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -300,7 +300,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+ config KASAN_SHADOW_OFFSET
+       hex
+       depends on KASAN
+-      default 0xdff8000000000000 if X86_5LEVEL
+       default 0xdffffc0000000000
+ 
+ config HAVE_INTEL_TXT
+-- 
+2.14.2
+
diff --git a/patches/kernel/0078-x86-xen-Provide-pre-built-page-tables-only-for-CONFI.patch b/patches/kernel/0078-x86-xen-Provide-pre-built-page-tables-only-for-CONFI.patch

deleted file mode 100644 (file)

index e7675b6..0000000
--- a/patches/kernel/0078-x86-xen-Provide-pre-built-page-tables-only-for-CONFI.patch
+++ /dev/null
@@ -1,80 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
-Date: Fri, 29 Sep 2017 17:08:19 +0300
-Subject: [PATCH] x86/xen: Provide pre-built page tables only for
- CONFIG_XEN_PV=y and CONFIG_XEN_PVH=y
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Looks like we only need pre-built page tables in the CONFIG_XEN_PV=y and
-CONFIG_XEN_PVH=y cases.
-
-Let's not provide them for other configurations.
-
-Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andy Lutomirski <luto@amacapital.net>
-Cc: Borislav Petkov <bp@suse.de>
-Cc: Cyrill Gorcunov <gorcunov@openvz.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/20170929140821.37654-5-kirill.shutemov@linux.intel.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 4375c29985f155d7eb2346615d84e62d1b673682)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit a883ee7f3c1dc64a8c946543ac598399353d1b03)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/head_64.S | 11 ++++++-----
- 1 file changed, 6 insertions(+), 5 deletions(-)
-
-diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
-index d081bc7a027d..12daaa0b187f 100644
---- a/arch/x86/kernel/head_64.S
-+++ b/arch/x86/kernel/head_64.S
-@@ -37,11 +37,12 @@
-  *
-  */
- 
--#define p4d_index(x)  (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
- #define pud_index(x)  (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
- 
-+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
- PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
- PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
-+#endif
- L3_START_KERNEL = pud_index(__START_KERNEL_map)
- 
-       .text
-@@ -348,10 +349,7 @@ NEXT_PAGE(early_dynamic_pgts)
- 
-       .data
- 
--#ifndef CONFIG_XEN
--NEXT_PAGE(init_top_pgt)
--      .fill   512,8,0
--#else
-+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
- NEXT_PAGE(init_top_pgt)
-       .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
-       .org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
-@@ -368,6 +366,9 @@ NEXT_PAGE(level2_ident_pgt)
-        * Don't set NX because code runs from these pages.
-        */
-       PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
-+#else
-+NEXT_PAGE(init_top_pgt)
-+      .fill   512,8,0
- #endif
- 
- #ifdef CONFIG_X86_5LEVEL
--- 
-2.14.2
-
diff --git a/patches/kernel/0079-x86-xen-Drop-5-level-paging-support-code-from-the-XE.patch b/patches/kernel/0079-x86-xen-Drop-5-level-paging-support-code-from-the-XE.patch

deleted file mode 100644 (file)

index 7073e79..0000000
--- a/patches/kernel/0079-x86-xen-Drop-5-level-paging-support-code-from-the-XE.patch
+++ /dev/null
@@ -1,316 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
-Date: Fri, 29 Sep 2017 17:08:20 +0300
-Subject: [PATCH] x86/xen: Drop 5-level paging support code from the XEN_PV
- code
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-It was decided 5-level paging is not going to be supported in XEN_PV.
-
-Let's drop the dead code from the XEN_PV code.
-
-Tested-by: Juergen Gross <jgross@suse.com>
-Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andy Lutomirski <luto@amacapital.net>
-Cc: Borislav Petkov <bp@suse.de>
-Cc: Cyrill Gorcunov <gorcunov@openvz.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Link: http://lkml.kernel.org/r/20170929140821.37654-6-kirill.shutemov@linux.intel.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 773dd2fca581b0a80e5a33332cc8ee67e5a79cba)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 3fd0b7ef0094fd8bb3c8172d9b137ebe0d81ecbc)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/xen/mmu_pv.c | 159 +++++++++++++++++++-------------------------------
- 1 file changed, 60 insertions(+), 99 deletions(-)
-
-diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
-index ba76f3ce997f..45bb2d462e44 100644
---- a/arch/x86/xen/mmu_pv.c
-+++ b/arch/x86/xen/mmu_pv.c
-@@ -469,7 +469,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
- }
- PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
- 
--#if CONFIG_PGTABLE_LEVELS == 4
-+#ifdef CONFIG_X86_64
- __visible pudval_t xen_pud_val(pud_t pud)
- {
-       return pte_mfn_to_pfn(pud.pud);
-@@ -558,7 +558,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
- 
-       xen_mc_issue(PARAVIRT_LAZY_MMU);
- }
--#endif        /* CONFIG_PGTABLE_LEVELS == 4 */
-+#endif        /* CONFIG_X86_64 */
- 
- static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
-               int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
-@@ -600,21 +600,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
-               int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
-               bool last, unsigned long limit)
- {
--      int i, nr, flush = 0;
-+      int flush = 0;
-+      pud_t *pud;
- 
--      nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
--      for (i = 0; i < nr; i++) {
--              pud_t *pud;
- 
--              if (p4d_none(p4d[i]))
--                      continue;
-+      if (p4d_none(*p4d))
-+              return flush;
- 
--              pud = pud_offset(&p4d[i], 0);
--              if (PTRS_PER_PUD > 1)
--                      flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
--              flush |= xen_pud_walk(mm, pud, func,
--                              last && i == nr - 1, limit);
--      }
-+      pud = pud_offset(p4d, 0);
-+      if (PTRS_PER_PUD > 1)
-+              flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
-+      flush |= xen_pud_walk(mm, pud, func, last, limit);
-       return flush;
- }
- 
-@@ -664,8 +660,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
-                       continue;
- 
-               p4d = p4d_offset(&pgd[i], 0);
--              if (PTRS_PER_P4D > 1)
--                      flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
-               flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
-       }
- 
-@@ -1196,22 +1190,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
- {
-       pgd_t *pgd;
-       p4d_t *p4d;
--      unsigned int i;
-       bool unpin;
- 
-       unpin = (vaddr == 2 * PGDIR_SIZE);
-       vaddr &= PMD_MASK;
-       pgd = pgd_offset_k(vaddr);
-       p4d = p4d_offset(pgd, 0);
--      for (i = 0; i < PTRS_PER_P4D; i++) {
--              if (p4d_none(p4d[i]))
--                      continue;
--              xen_cleanmfnmap_p4d(p4d + i, unpin);
--      }
--      if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
--              set_pgd(pgd, __pgd(0));
--              xen_cleanmfnmap_free_pgtbl(p4d, unpin);
--      }
-+      if (!p4d_none(*p4d))
-+              xen_cleanmfnmap_p4d(p4d, unpin);
- }
- 
- static void __init xen_pagetable_p2m_free(void)
-@@ -1717,7 +1703,7 @@ static void xen_release_pmd(unsigned long pfn)
-       xen_release_ptpage(pfn, PT_PMD);
- }
- 
--#if CONFIG_PGTABLE_LEVELS >= 4
-+#ifdef CONFIG_X86_64
- static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
- {
-       xen_alloc_ptpage(mm, pfn, PT_PUD);
-@@ -2054,13 +2040,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
-  */
- void __init xen_relocate_p2m(void)
- {
--      phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
-+      phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
-       unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
--      int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
-+      int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
-       pte_t *pt;
-       pmd_t *pmd;
-       pud_t *pud;
--      p4d_t *p4d = NULL;
-       pgd_t *pgd;
-       unsigned long *new_p2m;
-       int save_pud;
-@@ -2070,11 +2055,7 @@ void __init xen_relocate_p2m(void)
-       n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
-       n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
-       n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
--      if (PTRS_PER_P4D > 1)
--              n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
--      else
--              n_p4d = 0;
--      n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
-+      n_frames = n_pte + n_pt + n_pmd + n_pud;
- 
-       new_area = xen_find_free_area(PFN_PHYS(n_frames));
-       if (!new_area) {
-@@ -2090,76 +2071,56 @@ void __init xen_relocate_p2m(void)
-        * To avoid any possible virtual address collision, just use
-        * 2 * PUD_SIZE for the new area.
-        */
--      p4d_phys = new_area;
--      pud_phys = p4d_phys + PFN_PHYS(n_p4d);
-+      pud_phys = new_area;
-       pmd_phys = pud_phys + PFN_PHYS(n_pud);
-       pt_phys = pmd_phys + PFN_PHYS(n_pmd);
-       p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
- 
-       pgd = __va(read_cr3_pa());
-       new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
--      idx_p4d = 0;
-       save_pud = n_pud;
--      do {
--              if (n_p4d > 0) {
--                      p4d = early_memremap(p4d_phys, PAGE_SIZE);
--                      clear_page(p4d);
--                      n_pud = min(save_pud, PTRS_PER_P4D);
--              }
--              for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
--                      pud = early_memremap(pud_phys, PAGE_SIZE);
--                      clear_page(pud);
--                      for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
--                               idx_pmd++) {
--                              pmd = early_memremap(pmd_phys, PAGE_SIZE);
--                              clear_page(pmd);
--                              for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
--                                       idx_pt++) {
--                                      pt = early_memremap(pt_phys, PAGE_SIZE);
--                                      clear_page(pt);
--                                      for (idx_pte = 0;
--                                               idx_pte < min(n_pte, PTRS_PER_PTE);
--                                               idx_pte++) {
--                                              set_pte(pt + idx_pte,
--                                                              pfn_pte(p2m_pfn, PAGE_KERNEL));
--                                              p2m_pfn++;
--                                      }
--                                      n_pte -= PTRS_PER_PTE;
--                                      early_memunmap(pt, PAGE_SIZE);
--                                      make_lowmem_page_readonly(__va(pt_phys));
--                                      pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
--                                                      PFN_DOWN(pt_phys));
--                                      set_pmd(pmd + idx_pt,
--                                                      __pmd(_PAGE_TABLE | pt_phys));
--                                      pt_phys += PAGE_SIZE;
-+      for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
-+              pud = early_memremap(pud_phys, PAGE_SIZE);
-+              clear_page(pud);
-+              for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
-+                              idx_pmd++) {
-+                      pmd = early_memremap(pmd_phys, PAGE_SIZE);
-+                      clear_page(pmd);
-+                      for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
-+                                      idx_pt++) {
-+                              pt = early_memremap(pt_phys, PAGE_SIZE);
-+                              clear_page(pt);
-+                              for (idx_pte = 0;
-+                                              idx_pte < min(n_pte, PTRS_PER_PTE);
-+                                              idx_pte++) {
-+                                      set_pte(pt + idx_pte,
-+                                                      pfn_pte(p2m_pfn, PAGE_KERNEL));
-+                                      p2m_pfn++;
-                               }
--                              n_pt -= PTRS_PER_PMD;
--                              early_memunmap(pmd, PAGE_SIZE);
--                              make_lowmem_page_readonly(__va(pmd_phys));
--                              pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
--                                              PFN_DOWN(pmd_phys));
--                              set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
--                              pmd_phys += PAGE_SIZE;
-+                              n_pte -= PTRS_PER_PTE;
-+                              early_memunmap(pt, PAGE_SIZE);
-+                              make_lowmem_page_readonly(__va(pt_phys));
-+                              pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
-+                                              PFN_DOWN(pt_phys));
-+                              set_pmd(pmd + idx_pt,
-+                                              __pmd(_PAGE_TABLE | pt_phys));
-+                              pt_phys += PAGE_SIZE;
-                       }
--                      n_pmd -= PTRS_PER_PUD;
--                      early_memunmap(pud, PAGE_SIZE);
--                      make_lowmem_page_readonly(__va(pud_phys));
--                      pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
--                      if (n_p4d > 0)
--                              set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
--                      else
--                              set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
--                      pud_phys += PAGE_SIZE;
--              }
--              if (n_p4d > 0) {
--                      save_pud -= PTRS_PER_P4D;
--                      early_memunmap(p4d, PAGE_SIZE);
--                      make_lowmem_page_readonly(__va(p4d_phys));
--                      pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
--                      set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
--                      p4d_phys += PAGE_SIZE;
-+                      n_pt -= PTRS_PER_PMD;
-+                      early_memunmap(pmd, PAGE_SIZE);
-+                      make_lowmem_page_readonly(__va(pmd_phys));
-+                      pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
-+                                      PFN_DOWN(pmd_phys));
-+                      set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
-+                      pmd_phys += PAGE_SIZE;
-               }
--      } while (++idx_p4d < n_p4d);
-+              n_pmd -= PTRS_PER_PUD;
-+              early_memunmap(pud, PAGE_SIZE);
-+              make_lowmem_page_readonly(__va(pud_phys));
-+              pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
-+              set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
-+              pud_phys += PAGE_SIZE;
-+      }
- 
-       /* Now copy the old p2m info to the new area. */
-       memcpy(new_p2m, xen_p2m_addr, size);
-@@ -2386,7 +2347,7 @@ static void __init xen_post_allocator_init(void)
-       pv_mmu_ops.set_pte = xen_set_pte;
-       pv_mmu_ops.set_pmd = xen_set_pmd;
-       pv_mmu_ops.set_pud = xen_set_pud;
--#if CONFIG_PGTABLE_LEVELS >= 4
-+#ifdef CONFIG_X86_64
-       pv_mmu_ops.set_p4d = xen_set_p4d;
- #endif
- 
-@@ -2396,7 +2357,7 @@ static void __init xen_post_allocator_init(void)
-       pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
-       pv_mmu_ops.release_pte = xen_release_pte;
-       pv_mmu_ops.release_pmd = xen_release_pmd;
--#if CONFIG_PGTABLE_LEVELS >= 4
-+#ifdef CONFIG_X86_64
-       pv_mmu_ops.alloc_pud = xen_alloc_pud;
-       pv_mmu_ops.release_pud = xen_release_pud;
- #endif
-@@ -2460,14 +2421,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
-       .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
-       .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
- 
--#if CONFIG_PGTABLE_LEVELS >= 4
-+#ifdef CONFIG_X86_64
-       .pud_val = PV_CALLEE_SAVE(xen_pud_val),
-       .make_pud = PV_CALLEE_SAVE(xen_make_pud),
-       .set_p4d = xen_set_p4d_hyper,
- 
-       .alloc_pud = xen_alloc_pmd_init,
-       .release_pud = xen_release_pmd_init,
--#endif        /* CONFIG_PGTABLE_LEVELS == 4 */
-+#endif        /* CONFIG_X86_64 */
- 
-       .activate_mm = xen_activate_mm,
-       .dup_mmap = xen_dup_mmap,
--- 
-2.14.2
-
diff --git a/patches/kernel/0079-x86-xen-Provide-pre-built-page-tables-only-for-CONFI.patch b/patches/kernel/0079-x86-xen-Provide-pre-built-page-tables-only-for-CONFI.patch

new file mode 100644 (file)

index 0000000..e7675b6
--- /dev/null
+++ b/patches/kernel/0079-x86-xen-Provide-pre-built-page-tables-only-for-CONFI.patch
@@ -0,0 +1,80 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Fri, 29 Sep 2017 17:08:19 +0300
+Subject: [PATCH] x86/xen: Provide pre-built page tables only for
+ CONFIG_XEN_PV=y and CONFIG_XEN_PVH=y
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Looks like we only need pre-built page tables in the CONFIG_XEN_PV=y and
+CONFIG_XEN_PVH=y cases.
+
+Let's not provide them for other configurations.
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/20170929140821.37654-5-kirill.shutemov@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 4375c29985f155d7eb2346615d84e62d1b673682)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit a883ee7f3c1dc64a8c946543ac598399353d1b03)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/head_64.S | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
+index d081bc7a027d..12daaa0b187f 100644
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -37,11 +37,12 @@
+  *
+  */
+ 
+-#define p4d_index(x)  (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
+ #define pud_index(x)  (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+ 
++#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
+ PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
+ PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
++#endif
+ L3_START_KERNEL = pud_index(__START_KERNEL_map)
+ 
+       .text
+@@ -348,10 +349,7 @@ NEXT_PAGE(early_dynamic_pgts)
+ 
+       .data
+ 
+-#ifndef CONFIG_XEN
+-NEXT_PAGE(init_top_pgt)
+-      .fill   512,8,0
+-#else
++#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
+ NEXT_PAGE(init_top_pgt)
+       .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+       .org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
+@@ -368,6 +366,9 @@ NEXT_PAGE(level2_ident_pgt)
+        * Don't set NX because code runs from these pages.
+        */
+       PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
++#else
++NEXT_PAGE(init_top_pgt)
++      .fill   512,8,0
+ #endif
+ 
+ #ifdef CONFIG_X86_5LEVEL
+-- 
+2.14.2
+
diff --git a/patches/kernel/0080-ACPI-APEI-remove-the-unused-dead-code-for-SEA-NMI-no.patch b/patches/kernel/0080-ACPI-APEI-remove-the-unused-dead-code-for-SEA-NMI-no.patch

deleted file mode 100644 (file)

index 209308b..0000000
--- a/patches/kernel/0080-ACPI-APEI-remove-the-unused-dead-code-for-SEA-NMI-no.patch
+++ /dev/null
@@ -1,88 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dongjiu Geng <gengdongjiu@huawei.com>
-Date: Tue, 17 Oct 2017 16:02:20 +0800
-Subject: [PATCH] ACPI / APEI: remove the unused dead-code for SEA/NMI
- notification type
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-For the SEA notification, the two functions ghes_sea_add() and
-ghes_sea_remove() are only called when CONFIG_ACPI_APEI_SEA
-is defined. If not, it will return errors in the ghes_probe()
-and not continue. If the probe is failed, the ghes_sea_remove()
-also has no chance to be called. Hence, remove the unnecessary
-handling when CONFIG_ACPI_APEI_SEA is not defined.
-
-For the NMI notification, it has the same issue as SEA notification,
-so also remove the unused dead-code for it.
-
-Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
-Tested-by: Tyler Baicar <tbaicar@codeaurora.org>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-(cherry picked from commit c49870e89f4d2c21c76ebe90568246bb0f3572b7)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 55f73c32ba6438e8886f348722d2b25aef129d40)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- drivers/acpi/apei/ghes.c | 33 +++++----------------------------
- 1 file changed, 5 insertions(+), 28 deletions(-)
-
-diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
-index 3628078ee351..4827176f838d 100644
---- a/drivers/acpi/apei/ghes.c
-+++ b/drivers/acpi/apei/ghes.c
-@@ -850,17 +850,8 @@ static void ghes_sea_remove(struct ghes *ghes)
-       synchronize_rcu();
- }
- #else /* CONFIG_ACPI_APEI_SEA */
--static inline void ghes_sea_add(struct ghes *ghes)
--{
--      pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n",
--             ghes->generic->header.source_id);
--}
--
--static inline void ghes_sea_remove(struct ghes *ghes)
--{
--      pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n",
--             ghes->generic->header.source_id);
--}
-+static inline void ghes_sea_add(struct ghes *ghes) { }
-+static inline void ghes_sea_remove(struct ghes *ghes) { }
- #endif /* CONFIG_ACPI_APEI_SEA */
- 
- #ifdef CONFIG_HAVE_ACPI_APEI_NMI
-@@ -1062,23 +1053,9 @@ static void ghes_nmi_init_cxt(void)
-       init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
- }
- #else /* CONFIG_HAVE_ACPI_APEI_NMI */
--static inline void ghes_nmi_add(struct ghes *ghes)
--{
--      pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
--             ghes->generic->header.source_id);
--      BUG();
--}
--
--static inline void ghes_nmi_remove(struct ghes *ghes)
--{
--      pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
--             ghes->generic->header.source_id);
--      BUG();
--}
--
--static inline void ghes_nmi_init_cxt(void)
--{
--}
-+static inline void ghes_nmi_add(struct ghes *ghes) { }
-+static inline void ghes_nmi_remove(struct ghes *ghes) { }
-+static inline void ghes_nmi_init_cxt(void) { }
- #endif /* CONFIG_HAVE_ACPI_APEI_NMI */
- 
- static int ghes_probe(struct platform_device *ghes_dev)
--- 
-2.14.2
-
diff --git a/patches/kernel/0080-x86-xen-Drop-5-level-paging-support-code-from-the-XE.patch b/patches/kernel/0080-x86-xen-Drop-5-level-paging-support-code-from-the-XE.patch

new file mode 100644 (file)

index 0000000..7073e79
--- /dev/null
+++ b/patches/kernel/0080-x86-xen-Drop-5-level-paging-support-code-from-the-XE.patch
@@ -0,0 +1,316 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Fri, 29 Sep 2017 17:08:20 +0300
+Subject: [PATCH] x86/xen: Drop 5-level paging support code from the XEN_PV
+ code
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+It was decided 5-level paging is not going to be supported in XEN_PV.
+
+Let's drop the dead code from the XEN_PV code.
+
+Tested-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/20170929140821.37654-6-kirill.shutemov@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 773dd2fca581b0a80e5a33332cc8ee67e5a79cba)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 3fd0b7ef0094fd8bb3c8172d9b137ebe0d81ecbc)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/xen/mmu_pv.c | 159 +++++++++++++++++++-------------------------------
+ 1 file changed, 60 insertions(+), 99 deletions(-)
+
+diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
+index ba76f3ce997f..45bb2d462e44 100644
+--- a/arch/x86/xen/mmu_pv.c
++++ b/arch/x86/xen/mmu_pv.c
+@@ -469,7 +469,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
+ }
+ PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
+ 
+-#if CONFIG_PGTABLE_LEVELS == 4
++#ifdef CONFIG_X86_64
+ __visible pudval_t xen_pud_val(pud_t pud)
+ {
+       return pte_mfn_to_pfn(pud.pud);
+@@ -558,7 +558,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
+ 
+       xen_mc_issue(PARAVIRT_LAZY_MMU);
+ }
+-#endif        /* CONFIG_PGTABLE_LEVELS == 4 */
++#endif        /* CONFIG_X86_64 */
+ 
+ static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
+               int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+@@ -600,21 +600,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
+               int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+               bool last, unsigned long limit)
+ {
+-      int i, nr, flush = 0;
++      int flush = 0;
++      pud_t *pud;
+ 
+-      nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
+-      for (i = 0; i < nr; i++) {
+-              pud_t *pud;
+ 
+-              if (p4d_none(p4d[i]))
+-                      continue;
++      if (p4d_none(*p4d))
++              return flush;
+ 
+-              pud = pud_offset(&p4d[i], 0);
+-              if (PTRS_PER_PUD > 1)
+-                      flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
+-              flush |= xen_pud_walk(mm, pud, func,
+-                              last && i == nr - 1, limit);
+-      }
++      pud = pud_offset(p4d, 0);
++      if (PTRS_PER_PUD > 1)
++              flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
++      flush |= xen_pud_walk(mm, pud, func, last, limit);
+       return flush;
+ }
+ 
+@@ -664,8 +660,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
+                       continue;
+ 
+               p4d = p4d_offset(&pgd[i], 0);
+-              if (PTRS_PER_P4D > 1)
+-                      flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
+               flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
+       }
+ 
+@@ -1196,22 +1190,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
+ {
+       pgd_t *pgd;
+       p4d_t *p4d;
+-      unsigned int i;
+       bool unpin;
+ 
+       unpin = (vaddr == 2 * PGDIR_SIZE);
+       vaddr &= PMD_MASK;
+       pgd = pgd_offset_k(vaddr);
+       p4d = p4d_offset(pgd, 0);
+-      for (i = 0; i < PTRS_PER_P4D; i++) {
+-              if (p4d_none(p4d[i]))
+-                      continue;
+-              xen_cleanmfnmap_p4d(p4d + i, unpin);
+-      }
+-      if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+-              set_pgd(pgd, __pgd(0));
+-              xen_cleanmfnmap_free_pgtbl(p4d, unpin);
+-      }
++      if (!p4d_none(*p4d))
++              xen_cleanmfnmap_p4d(p4d, unpin);
+ }
+ 
+ static void __init xen_pagetable_p2m_free(void)
+@@ -1717,7 +1703,7 @@ static void xen_release_pmd(unsigned long pfn)
+       xen_release_ptpage(pfn, PT_PMD);
+ }
+ 
+-#if CONFIG_PGTABLE_LEVELS >= 4
++#ifdef CONFIG_X86_64
+ static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
+ {
+       xen_alloc_ptpage(mm, pfn, PT_PUD);
+@@ -2054,13 +2040,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
+  */
+ void __init xen_relocate_p2m(void)
+ {
+-      phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
++      phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
+       unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
+-      int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
++      int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
+       pte_t *pt;
+       pmd_t *pmd;
+       pud_t *pud;
+-      p4d_t *p4d = NULL;
+       pgd_t *pgd;
+       unsigned long *new_p2m;
+       int save_pud;
+@@ -2070,11 +2055,7 @@ void __init xen_relocate_p2m(void)
+       n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
+       n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
+       n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
+-      if (PTRS_PER_P4D > 1)
+-              n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
+-      else
+-              n_p4d = 0;
+-      n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
++      n_frames = n_pte + n_pt + n_pmd + n_pud;
+ 
+       new_area = xen_find_free_area(PFN_PHYS(n_frames));
+       if (!new_area) {
+@@ -2090,76 +2071,56 @@ void __init xen_relocate_p2m(void)
+        * To avoid any possible virtual address collision, just use
+        * 2 * PUD_SIZE for the new area.
+        */
+-      p4d_phys = new_area;
+-      pud_phys = p4d_phys + PFN_PHYS(n_p4d);
++      pud_phys = new_area;
+       pmd_phys = pud_phys + PFN_PHYS(n_pud);
+       pt_phys = pmd_phys + PFN_PHYS(n_pmd);
+       p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
+ 
+       pgd = __va(read_cr3_pa());
+       new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
+-      idx_p4d = 0;
+       save_pud = n_pud;
+-      do {
+-              if (n_p4d > 0) {
+-                      p4d = early_memremap(p4d_phys, PAGE_SIZE);
+-                      clear_page(p4d);
+-                      n_pud = min(save_pud, PTRS_PER_P4D);
+-              }
+-              for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
+-                      pud = early_memremap(pud_phys, PAGE_SIZE);
+-                      clear_page(pud);
+-                      for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
+-                               idx_pmd++) {
+-                              pmd = early_memremap(pmd_phys, PAGE_SIZE);
+-                              clear_page(pmd);
+-                              for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
+-                                       idx_pt++) {
+-                                      pt = early_memremap(pt_phys, PAGE_SIZE);
+-                                      clear_page(pt);
+-                                      for (idx_pte = 0;
+-                                               idx_pte < min(n_pte, PTRS_PER_PTE);
+-                                               idx_pte++) {
+-                                              set_pte(pt + idx_pte,
+-                                                              pfn_pte(p2m_pfn, PAGE_KERNEL));
+-                                              p2m_pfn++;
+-                                      }
+-                                      n_pte -= PTRS_PER_PTE;
+-                                      early_memunmap(pt, PAGE_SIZE);
+-                                      make_lowmem_page_readonly(__va(pt_phys));
+-                                      pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
+-                                                      PFN_DOWN(pt_phys));
+-                                      set_pmd(pmd + idx_pt,
+-                                                      __pmd(_PAGE_TABLE | pt_phys));
+-                                      pt_phys += PAGE_SIZE;
++      for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
++              pud = early_memremap(pud_phys, PAGE_SIZE);
++              clear_page(pud);
++              for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
++                              idx_pmd++) {
++                      pmd = early_memremap(pmd_phys, PAGE_SIZE);
++                      clear_page(pmd);
++                      for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
++                                      idx_pt++) {
++                              pt = early_memremap(pt_phys, PAGE_SIZE);
++                              clear_page(pt);
++                              for (idx_pte = 0;
++                                              idx_pte < min(n_pte, PTRS_PER_PTE);
++                                              idx_pte++) {
++                                      set_pte(pt + idx_pte,
++                                                      pfn_pte(p2m_pfn, PAGE_KERNEL));
++                                      p2m_pfn++;
+                               }
+-                              n_pt -= PTRS_PER_PMD;
+-                              early_memunmap(pmd, PAGE_SIZE);
+-                              make_lowmem_page_readonly(__va(pmd_phys));
+-                              pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
+-                                              PFN_DOWN(pmd_phys));
+-                              set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
+-                              pmd_phys += PAGE_SIZE;
++                              n_pte -= PTRS_PER_PTE;
++                              early_memunmap(pt, PAGE_SIZE);
++                              make_lowmem_page_readonly(__va(pt_phys));
++                              pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
++                                              PFN_DOWN(pt_phys));
++                              set_pmd(pmd + idx_pt,
++                                              __pmd(_PAGE_TABLE | pt_phys));
++                              pt_phys += PAGE_SIZE;
+                       }
+-                      n_pmd -= PTRS_PER_PUD;
+-                      early_memunmap(pud, PAGE_SIZE);
+-                      make_lowmem_page_readonly(__va(pud_phys));
+-                      pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
+-                      if (n_p4d > 0)
+-                              set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
+-                      else
+-                              set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
+-                      pud_phys += PAGE_SIZE;
+-              }
+-              if (n_p4d > 0) {
+-                      save_pud -= PTRS_PER_P4D;
+-                      early_memunmap(p4d, PAGE_SIZE);
+-                      make_lowmem_page_readonly(__va(p4d_phys));
+-                      pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
+-                      set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
+-                      p4d_phys += PAGE_SIZE;
++                      n_pt -= PTRS_PER_PMD;
++                      early_memunmap(pmd, PAGE_SIZE);
++                      make_lowmem_page_readonly(__va(pmd_phys));
++                      pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
++                                      PFN_DOWN(pmd_phys));
++                      set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
++                      pmd_phys += PAGE_SIZE;
+               }
+-      } while (++idx_p4d < n_p4d);
++              n_pmd -= PTRS_PER_PUD;
++              early_memunmap(pud, PAGE_SIZE);
++              make_lowmem_page_readonly(__va(pud_phys));
++              pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
++              set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
++              pud_phys += PAGE_SIZE;
++      }
+ 
+       /* Now copy the old p2m info to the new area. */
+       memcpy(new_p2m, xen_p2m_addr, size);
+@@ -2386,7 +2347,7 @@ static void __init xen_post_allocator_init(void)
+       pv_mmu_ops.set_pte = xen_set_pte;
+       pv_mmu_ops.set_pmd = xen_set_pmd;
+       pv_mmu_ops.set_pud = xen_set_pud;
+-#if CONFIG_PGTABLE_LEVELS >= 4
++#ifdef CONFIG_X86_64
+       pv_mmu_ops.set_p4d = xen_set_p4d;
+ #endif
+ 
+@@ -2396,7 +2357,7 @@ static void __init xen_post_allocator_init(void)
+       pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
+       pv_mmu_ops.release_pte = xen_release_pte;
+       pv_mmu_ops.release_pmd = xen_release_pmd;
+-#if CONFIG_PGTABLE_LEVELS >= 4
++#ifdef CONFIG_X86_64
+       pv_mmu_ops.alloc_pud = xen_alloc_pud;
+       pv_mmu_ops.release_pud = xen_release_pud;
+ #endif
+@@ -2460,14 +2421,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
+       .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
+       .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
+ 
+-#if CONFIG_PGTABLE_LEVELS >= 4
++#ifdef CONFIG_X86_64
+       .pud_val = PV_CALLEE_SAVE(xen_pud_val),
+       .make_pud = PV_CALLEE_SAVE(xen_make_pud),
+       .set_p4d = xen_set_p4d_hyper,
+ 
+       .alloc_pud = xen_alloc_pmd_init,
+       .release_pud = xen_release_pmd_init,
+-#endif        /* CONFIG_PGTABLE_LEVELS == 4 */
++#endif        /* CONFIG_X86_64 */
+ 
+       .activate_mm = xen_activate_mm,
+       .dup_mmap = xen_dup_mmap,
+-- 
+2.14.2
+
diff --git a/patches/kernel/0081-ACPI-APEI-remove-the-unused-dead-code-for-SEA-NMI-no.patch b/patches/kernel/0081-ACPI-APEI-remove-the-unused-dead-code-for-SEA-NMI-no.patch

new file mode 100644 (file)

index 0000000..209308b
--- /dev/null
+++ b/patches/kernel/0081-ACPI-APEI-remove-the-unused-dead-code-for-SEA-NMI-no.patch
@@ -0,0 +1,88 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dongjiu Geng <gengdongjiu@huawei.com>
+Date: Tue, 17 Oct 2017 16:02:20 +0800
+Subject: [PATCH] ACPI / APEI: remove the unused dead-code for SEA/NMI
+ notification type
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+For the SEA notification, the two functions ghes_sea_add() and
+ghes_sea_remove() are only called when CONFIG_ACPI_APEI_SEA
+is defined. If not, it will return errors in the ghes_probe()
+and not continue. If the probe is failed, the ghes_sea_remove()
+also has no chance to be called. Hence, remove the unnecessary
+handling when CONFIG_ACPI_APEI_SEA is not defined.
+
+For the NMI notification, it has the same issue as SEA notification,
+so also remove the unused dead-code for it.
+
+Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
+Tested-by: Tyler Baicar <tbaicar@codeaurora.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+(cherry picked from commit c49870e89f4d2c21c76ebe90568246bb0f3572b7)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 55f73c32ba6438e8886f348722d2b25aef129d40)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/acpi/apei/ghes.c | 33 +++++----------------------------
+ 1 file changed, 5 insertions(+), 28 deletions(-)
+
+diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
+index 3628078ee351..4827176f838d 100644
+--- a/drivers/acpi/apei/ghes.c
++++ b/drivers/acpi/apei/ghes.c
+@@ -850,17 +850,8 @@ static void ghes_sea_remove(struct ghes *ghes)
+       synchronize_rcu();
+ }
+ #else /* CONFIG_ACPI_APEI_SEA */
+-static inline void ghes_sea_add(struct ghes *ghes)
+-{
+-      pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n",
+-             ghes->generic->header.source_id);
+-}
+-
+-static inline void ghes_sea_remove(struct ghes *ghes)
+-{
+-      pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n",
+-             ghes->generic->header.source_id);
+-}
++static inline void ghes_sea_add(struct ghes *ghes) { }
++static inline void ghes_sea_remove(struct ghes *ghes) { }
+ #endif /* CONFIG_ACPI_APEI_SEA */
+ 
+ #ifdef CONFIG_HAVE_ACPI_APEI_NMI
+@@ -1062,23 +1053,9 @@ static void ghes_nmi_init_cxt(void)
+       init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
+ }
+ #else /* CONFIG_HAVE_ACPI_APEI_NMI */
+-static inline void ghes_nmi_add(struct ghes *ghes)
+-{
+-      pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
+-             ghes->generic->header.source_id);
+-      BUG();
+-}
+-
+-static inline void ghes_nmi_remove(struct ghes *ghes)
+-{
+-      pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
+-             ghes->generic->header.source_id);
+-      BUG();
+-}
+-
+-static inline void ghes_nmi_init_cxt(void)
+-{
+-}
++static inline void ghes_nmi_add(struct ghes *ghes) { }
++static inline void ghes_nmi_remove(struct ghes *ghes) { }
++static inline void ghes_nmi_init_cxt(void) { }
+ #endif /* CONFIG_HAVE_ACPI_APEI_NMI */
+ 
+ static int ghes_probe(struct platform_device *ghes_dev)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0081-x86-asm-Don-t-use-the-confusing-.ifeq-directive.patch b/patches/kernel/0081-x86-asm-Don-t-use-the-confusing-.ifeq-directive.patch

deleted file mode 100644 (file)

index dfe7a2f..0000000
--- a/patches/kernel/0081-x86-asm-Don-t-use-the-confusing-.ifeq-directive.patch
+++ /dev/null
@@ -1,78 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Fri, 20 Oct 2017 11:21:35 -0500
-Subject: [PATCH] x86/asm: Don't use the confusing '.ifeq' directive
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-I find the '.ifeq <expression>' directive to be confusing.  Reading it
-quickly seems to suggest its opposite meaning, or that it's missing an
-argument.
-
-Improve readability by replacing all of its x86 uses with
-'.if <expression> == 0'.
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andrei Vagin <avagin@virtuozzo.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/757da028e802c7e98d23fbab8d234b1063e161cf.1508516398.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 82c62fa0c49aa305104013cee4468772799bb391)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 981dedac1061fb47d0b04e07f6752be195d7e41a)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S | 2 +-
- arch/x86/kernel/head_32.S | 2 +-
- arch/x86/kernel/head_64.S | 2 +-
- 3 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 2e4fc6425f47..34adfe0221d2 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -830,7 +830,7 @@ ENTRY(\sym)
- 
-       ASM_CLAC
- 
--      .ifeq \has_error_code
-+      .if \has_error_code == 0
-       pushq   $-1                             /* ORIG_RAX: no syscall to restart */
-       .endif
- 
-diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
-index 1f85ee8f9439..337a65377baf 100644
---- a/arch/x86/kernel/head_32.S
-+++ b/arch/x86/kernel/head_32.S
-@@ -435,7 +435,7 @@ ENTRY(early_idt_handler_array)
-       # 24(%rsp) error code
-       i = 0
-       .rept NUM_EXCEPTION_VECTORS
--      .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
-+      .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
-       pushl $0                # Dummy error code, to make stack frame uniform
-       .endif
-       pushl $i                # 20(%esp) Vector number
-diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
-index 12daaa0b187f..a2d8541b1da4 100644
---- a/arch/x86/kernel/head_64.S
-+++ b/arch/x86/kernel/head_64.S
-@@ -258,7 +258,7 @@ ENDPROC(start_cpu0)
- ENTRY(early_idt_handler_array)
-       i = 0
-       .rept NUM_EXCEPTION_VECTORS
--      .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
-+      .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
-               UNWIND_HINT_IRET_REGS
-               pushq $0        # Dummy error code, to make stack frame uniform
-       .else
--- 
-2.14.2
-
diff --git a/patches/kernel/0082-x86-asm-Don-t-use-the-confusing-.ifeq-directive.patch b/patches/kernel/0082-x86-asm-Don-t-use-the-confusing-.ifeq-directive.patch

new file mode 100644 (file)

index 0000000..dfe7a2f
--- /dev/null
+++ b/patches/kernel/0082-x86-asm-Don-t-use-the-confusing-.ifeq-directive.patch
@@ -0,0 +1,78 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Fri, 20 Oct 2017 11:21:35 -0500
+Subject: [PATCH] x86/asm: Don't use the confusing '.ifeq' directive
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+I find the '.ifeq <expression>' directive to be confusing.  Reading it
+quickly seems to suggest its opposite meaning, or that it's missing an
+argument.
+
+Improve readability by replacing all of its x86 uses with
+'.if <expression> == 0'.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andrei Vagin <avagin@virtuozzo.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/757da028e802c7e98d23fbab8d234b1063e161cf.1508516398.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 82c62fa0c49aa305104013cee4468772799bb391)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 981dedac1061fb47d0b04e07f6752be195d7e41a)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S | 2 +-
+ arch/x86/kernel/head_32.S | 2 +-
+ arch/x86/kernel/head_64.S | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 2e4fc6425f47..34adfe0221d2 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -830,7 +830,7 @@ ENTRY(\sym)
+ 
+       ASM_CLAC
+ 
+-      .ifeq \has_error_code
++      .if \has_error_code == 0
+       pushq   $-1                             /* ORIG_RAX: no syscall to restart */
+       .endif
+ 
+diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
+index 1f85ee8f9439..337a65377baf 100644
+--- a/arch/x86/kernel/head_32.S
++++ b/arch/x86/kernel/head_32.S
+@@ -435,7 +435,7 @@ ENTRY(early_idt_handler_array)
+       # 24(%rsp) error code
+       i = 0
+       .rept NUM_EXCEPTION_VECTORS
+-      .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
++      .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
+       pushl $0                # Dummy error code, to make stack frame uniform
+       .endif
+       pushl $i                # 20(%esp) Vector number
+diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
+index 12daaa0b187f..a2d8541b1da4 100644
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -258,7 +258,7 @@ ENDPROC(start_cpu0)
+ ENTRY(early_idt_handler_array)
+       i = 0
+       .rept NUM_EXCEPTION_VECTORS
+-      .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
++      .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
+               UNWIND_HINT_IRET_REGS
+               pushq $0        # Dummy error code, to make stack frame uniform
+       .else
+-- 
+2.14.2
+
diff --git a/patches/kernel/0082-x86-build-Beautify-build-log-of-syscall-headers.patch b/patches/kernel/0082-x86-build-Beautify-build-log-of-syscall-headers.patch

deleted file mode 100644 (file)

index 8bd14b6..0000000
--- a/patches/kernel/0082-x86-build-Beautify-build-log-of-syscall-headers.patch
+++ /dev/null
@@ -1,62 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Masahiro Yamada <yamada.masahiro@socionext.com>
-Date: Fri, 27 Oct 2017 13:11:10 +0900
-Subject: [PATCH] x86/build: Beautify build log of syscall headers
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This makes the build log look nicer.
-
-Before:
-  SYSTBL  arch/x86/entry/syscalls/../../include/generated/asm/syscalls_32.h
-  SYSHDR  arch/x86/entry/syscalls/../../include/generated/asm/unistd_32_ia32.h
-  SYSHDR  arch/x86/entry/syscalls/../../include/generated/asm/unistd_64_x32.h
-  SYSTBL  arch/x86/entry/syscalls/../../include/generated/asm/syscalls_64.h
-  SYSHDR  arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_32.h
-  SYSHDR  arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_64.h
-  SYSHDR  arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_x32.h
-
-After:
-  SYSTBL  arch/x86/include/generated/asm/syscalls_32.h
-  SYSHDR  arch/x86/include/generated/asm/unistd_32_ia32.h
-  SYSHDR  arch/x86/include/generated/asm/unistd_64_x32.h
-  SYSTBL  arch/x86/include/generated/asm/syscalls_64.h
-  SYSHDR  arch/x86/include/generated/uapi/asm/unistd_32.h
-  SYSHDR  arch/x86/include/generated/uapi/asm/unistd_64.h
-  SYSHDR  arch/x86/include/generated/uapi/asm/unistd_x32.h
-
-Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
-Acked-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: "H. Peter Anvin" <hpa@zytor.com>
-Cc: linux-kbuild@vger.kernel.org
-Link: http://lkml.kernel.org/r/1509077470-2735-1-git-send-email-yamada.masahiro@socionext.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit af8e947079a7dab0480b5d6db6b093fd04b86fc9)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d945957924e9b1a469516b4029fd384138c2cb69)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/syscalls/Makefile | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
-index 57aa59fd140c..e34c7a931994 100644
---- a/arch/x86/entry/syscalls/Makefile
-+++ b/arch/x86/entry/syscalls/Makefile
-@@ -1,5 +1,5 @@
--out := $(obj)/../../include/generated/asm
--uapi := $(obj)/../../include/generated/uapi/asm
-+out := arch/$(SRCARCH)/include/generated/asm
-+uapi := arch/$(SRCARCH)/include/generated/uapi/asm
- 
- # Create output directory if not already present
- _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
--- 
-2.14.2
-
diff --git a/patches/kernel/0083-x86-build-Beautify-build-log-of-syscall-headers.patch b/patches/kernel/0083-x86-build-Beautify-build-log-of-syscall-headers.patch

new file mode 100644 (file)

index 0000000..8bd14b6
--- /dev/null
+++ b/patches/kernel/0083-x86-build-Beautify-build-log-of-syscall-headers.patch
@@ -0,0 +1,62 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Masahiro Yamada <yamada.masahiro@socionext.com>
+Date: Fri, 27 Oct 2017 13:11:10 +0900
+Subject: [PATCH] x86/build: Beautify build log of syscall headers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This makes the build log look nicer.
+
+Before:
+  SYSTBL  arch/x86/entry/syscalls/../../include/generated/asm/syscalls_32.h
+  SYSHDR  arch/x86/entry/syscalls/../../include/generated/asm/unistd_32_ia32.h
+  SYSHDR  arch/x86/entry/syscalls/../../include/generated/asm/unistd_64_x32.h
+  SYSTBL  arch/x86/entry/syscalls/../../include/generated/asm/syscalls_64.h
+  SYSHDR  arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_32.h
+  SYSHDR  arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_64.h
+  SYSHDR  arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_x32.h
+
+After:
+  SYSTBL  arch/x86/include/generated/asm/syscalls_32.h
+  SYSHDR  arch/x86/include/generated/asm/unistd_32_ia32.h
+  SYSHDR  arch/x86/include/generated/asm/unistd_64_x32.h
+  SYSTBL  arch/x86/include/generated/asm/syscalls_64.h
+  SYSHDR  arch/x86/include/generated/uapi/asm/unistd_32.h
+  SYSHDR  arch/x86/include/generated/uapi/asm/unistd_64.h
+  SYSHDR  arch/x86/include/generated/uapi/asm/unistd_x32.h
+
+Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: linux-kbuild@vger.kernel.org
+Link: http://lkml.kernel.org/r/1509077470-2735-1-git-send-email-yamada.masahiro@socionext.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit af8e947079a7dab0480b5d6db6b093fd04b86fc9)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d945957924e9b1a469516b4029fd384138c2cb69)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/syscalls/Makefile | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
+index 57aa59fd140c..e34c7a931994 100644
+--- a/arch/x86/entry/syscalls/Makefile
++++ b/arch/x86/entry/syscalls/Makefile
+@@ -1,5 +1,5 @@
+-out := $(obj)/../../include/generated/asm
+-uapi := $(obj)/../../include/generated/uapi/asm
++out := arch/$(SRCARCH)/include/generated/asm
++uapi := arch/$(SRCARCH)/include/generated/uapi/asm
+ 
+ # Create output directory if not already present
+ _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
+-- 
+2.14.2
+
diff --git a/patches/kernel/0083-x86-mm-64-Rename-the-register_page_bootmem_memmap-si.patch b/patches/kernel/0083-x86-mm-64-Rename-the-register_page_bootmem_memmap-si.patch

deleted file mode 100644 (file)

index 3b8f212..0000000
--- a/patches/kernel/0083-x86-mm-64-Rename-the-register_page_bootmem_memmap-si.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Baoquan He <bhe@redhat.com>
-Date: Sat, 28 Oct 2017 09:30:38 +0800
-Subject: [PATCH] x86/mm/64: Rename the register_page_bootmem_memmap() 'size'
- parameter to 'nr_pages'
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-register_page_bootmem_memmap()'s 3rd 'size' parameter is named
-in a somewhat misleading fashion - rename it to 'nr_pages' which
-makes the units of it much clearer.
-
-Meanwhile rename the existing local variable 'nr_pages' to
-'nr_pmd_pages', a more expressive name, to avoid conflict with
-new function parameter 'nr_pages'.
-
-(Also clean up the unnecessary parentheses in which get_order() is called.)
-
-Signed-off-by: Baoquan He <bhe@redhat.com>
-Acked-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: akpm@linux-foundation.org
-Link: http://lkml.kernel.org/r/1509154238-23250-1-git-send-email-bhe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 15670bfe19905b1dcbb63137f40d718b59d84479)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d73ad1d31ef8a44c6e5977c5123cbaa6d02e2035)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- include/linux/mm.h    |  2 +-
- arch/x86/mm/init_64.c | 10 +++++-----
- 2 files changed, 6 insertions(+), 6 deletions(-)
-
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 07630442bbf2..97f6ca707010 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -2475,7 +2475,7 @@ void vmemmap_populate_print_last(void);
- void vmemmap_free(unsigned long start, unsigned long end);
- #endif
- void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
--                                unsigned long size);
-+                                unsigned long nr_pages);
- 
- enum mf_flags {
-       MF_COUNT_INCREASED = 1 << 0,
-diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
-index 136422d7d539..902983c8ea8c 100644
---- a/arch/x86/mm/init_64.c
-+++ b/arch/x86/mm/init_64.c
-@@ -1418,16 +1418,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
- 
- #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
- void register_page_bootmem_memmap(unsigned long section_nr,
--                                struct page *start_page, unsigned long size)
-+                                struct page *start_page, unsigned long nr_pages)
- {
-       unsigned long addr = (unsigned long)start_page;
--      unsigned long end = (unsigned long)(start_page + size);
-+      unsigned long end = (unsigned long)(start_page + nr_pages);
-       unsigned long next;
-       pgd_t *pgd;
-       p4d_t *p4d;
-       pud_t *pud;
-       pmd_t *pmd;
--      unsigned int nr_pages;
-+      unsigned int nr_pmd_pages;
-       struct page *page;
- 
-       for (; addr < end; addr = next) {
-@@ -1474,9 +1474,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
-                       if (pmd_none(*pmd))
-                               continue;
- 
--                      nr_pages = 1 << (get_order(PMD_SIZE));
-+                      nr_pmd_pages = 1 << get_order(PMD_SIZE);
-                       page = pmd_page(*pmd);
--                      while (nr_pages--)
-+                      while (nr_pmd_pages--)
-                               get_page_bootmem(section_nr, page++,
-                                                SECTION_INFO);
-               }
--- 
-2.14.2
-
diff --git a/patches/kernel/0084-x86-cpufeatures-Enable-new-SSE-AVX-AVX512-CPU-featur.patch b/patches/kernel/0084-x86-cpufeatures-Enable-new-SSE-AVX-AVX512-CPU-featur.patch

deleted file mode 100644 (file)

index bf5c981..0000000
--- a/patches/kernel/0084-x86-cpufeatures-Enable-new-SSE-AVX-AVX512-CPU-featur.patch
+++ /dev/null
@@ -1,86 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Gayatri Kammela <gayatri.kammela@intel.com>
-Date: Mon, 30 Oct 2017 18:20:29 -0700
-Subject: [PATCH] x86/cpufeatures: Enable new SSE/AVX/AVX512 CPU features
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Add a few new SSE/AVX/AVX512 instruction groups/features for enumeration
-in /proc/cpuinfo: AVX512_VBMI2, GFNI, VAES, VPCLMULQDQ, AVX512_VNNI,
-AVX512_BITALG.
-
- CPUID.(EAX=7,ECX=0):ECX[bit 6]  AVX512_VBMI2
- CPUID.(EAX=7,ECX=0):ECX[bit 8]  GFNI
- CPUID.(EAX=7,ECX=0):ECX[bit 9]  VAES
- CPUID.(EAX=7,ECX=0):ECX[bit 10] VPCLMULQDQ
- CPUID.(EAX=7,ECX=0):ECX[bit 11] AVX512_VNNI
- CPUID.(EAX=7,ECX=0):ECX[bit 12] AVX512_BITALG
-
-Detailed information of CPUID bits for these features can be found
-in the Intel Architecture Instruction Set Extensions and Future Features
-Programming Interface document (refer to Table 1-1. and Table 1-2.).
-A copy of this document is available at
-https://bugzilla.kernel.org/show_bug.cgi?id=197239
-
-Signed-off-by: Gayatri Kammela <gayatri.kammela@intel.com>
-Acked-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andi Kleen <andi.kleen@intel.com>
-Cc: Fenghua Yu <fenghua.yu@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Ravi Shankar <ravi.v.shankar@intel.com>
-Cc: Ricardo Neri <ricardo.neri@intel.com>
-Cc: Yang Zhong <yang.zhong@intel.com>
-Cc: bp@alien8.de
-Link: http://lkml.kernel.org/r/1509412829-23380-1-git-send-email-gayatri.kammela@intel.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit c128dbfa0f879f8ce7b79054037889b0b2240728)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit b29eb29c5aca4708d66fa977db40c779366636a2)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cpufeatures.h | 6 ++++++
- arch/x86/kernel/cpu/cpuid-deps.c   | 6 ++++++
- 2 files changed, 12 insertions(+)
-
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index f4e145c4b06f..c465bd6613ed 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -297,6 +297,12 @@
- #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
- #define X86_FEATURE_PKU               (16*32+ 3) /* Protection Keys for Userspace */
- #define X86_FEATURE_OSPKE     (16*32+ 4) /* OS Protection Keys Enable */
-+#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
-+#define X86_FEATURE_GFNI      (16*32+ 8) /* Galois Field New Instructions */
-+#define X86_FEATURE_VAES      (16*32+ 9) /* Vector AES */
-+#define X86_FEATURE_VPCLMULQDQ        (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
-+#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */
-+#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
- #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
- #define X86_FEATURE_LA57      (16*32+16) /* 5-level page tables */
- #define X86_FEATURE_RDPID     (16*32+22) /* RDPID instruction */
-diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
-index c1d49842a411..c21f22d836ad 100644
---- a/arch/x86/kernel/cpu/cpuid-deps.c
-+++ b/arch/x86/kernel/cpu/cpuid-deps.c
-@@ -50,6 +50,12 @@ const static struct cpuid_dep cpuid_deps[] = {
-       { X86_FEATURE_AVX512BW,         X86_FEATURE_AVX512F   },
-       { X86_FEATURE_AVX512VL,         X86_FEATURE_AVX512F   },
-       { X86_FEATURE_AVX512VBMI,       X86_FEATURE_AVX512F   },
-+      { X86_FEATURE_AVX512_VBMI2,     X86_FEATURE_AVX512VL  },
-+      { X86_FEATURE_GFNI,             X86_FEATURE_AVX512VL  },
-+      { X86_FEATURE_VAES,             X86_FEATURE_AVX512VL  },
-+      { X86_FEATURE_VPCLMULQDQ,       X86_FEATURE_AVX512VL  },
-+      { X86_FEATURE_AVX512_VNNI,      X86_FEATURE_AVX512VL  },
-+      { X86_FEATURE_AVX512_BITALG,    X86_FEATURE_AVX512VL  },
-       { X86_FEATURE_AVX512_4VNNIW,    X86_FEATURE_AVX512F   },
-       { X86_FEATURE_AVX512_4FMAPS,    X86_FEATURE_AVX512F   },
-       { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F   },
--- 
-2.14.2
-
diff --git a/patches/kernel/0084-x86-mm-64-Rename-the-register_page_bootmem_memmap-si.patch b/patches/kernel/0084-x86-mm-64-Rename-the-register_page_bootmem_memmap-si.patch

new file mode 100644 (file)

index 0000000..3b8f212
--- /dev/null
+++ b/patches/kernel/0084-x86-mm-64-Rename-the-register_page_bootmem_memmap-si.patch
@@ -0,0 +1,90 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Baoquan He <bhe@redhat.com>
+Date: Sat, 28 Oct 2017 09:30:38 +0800
+Subject: [PATCH] x86/mm/64: Rename the register_page_bootmem_memmap() 'size'
+ parameter to 'nr_pages'
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+register_page_bootmem_memmap()'s 3rd 'size' parameter is named
+in a somewhat misleading fashion - rename it to 'nr_pages' which
+makes the units of it much clearer.
+
+Meanwhile rename the existing local variable 'nr_pages' to
+'nr_pmd_pages', a more expressive name, to avoid conflict with
+new function parameter 'nr_pages'.
+
+(Also clean up the unnecessary parentheses in which get_order() is called.)
+
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: akpm@linux-foundation.org
+Link: http://lkml.kernel.org/r/1509154238-23250-1-git-send-email-bhe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 15670bfe19905b1dcbb63137f40d718b59d84479)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d73ad1d31ef8a44c6e5977c5123cbaa6d02e2035)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ include/linux/mm.h    |  2 +-
+ arch/x86/mm/init_64.c | 10 +++++-----
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 07630442bbf2..97f6ca707010 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2475,7 +2475,7 @@ void vmemmap_populate_print_last(void);
+ void vmemmap_free(unsigned long start, unsigned long end);
+ #endif
+ void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
+-                                unsigned long size);
++                                unsigned long nr_pages);
+ 
+ enum mf_flags {
+       MF_COUNT_INCREASED = 1 << 0,
+diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
+index 136422d7d539..902983c8ea8c 100644
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -1418,16 +1418,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+ 
+ #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
+ void register_page_bootmem_memmap(unsigned long section_nr,
+-                                struct page *start_page, unsigned long size)
++                                struct page *start_page, unsigned long nr_pages)
+ {
+       unsigned long addr = (unsigned long)start_page;
+-      unsigned long end = (unsigned long)(start_page + size);
++      unsigned long end = (unsigned long)(start_page + nr_pages);
+       unsigned long next;
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+-      unsigned int nr_pages;
++      unsigned int nr_pmd_pages;
+       struct page *page;
+ 
+       for (; addr < end; addr = next) {
+@@ -1474,9 +1474,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
+                       if (pmd_none(*pmd))
+                               continue;
+ 
+-                      nr_pages = 1 << (get_order(PMD_SIZE));
++                      nr_pmd_pages = 1 << get_order(PMD_SIZE);
+                       page = pmd_page(*pmd);
+-                      while (nr_pages--)
++                      while (nr_pmd_pages--)
+                               get_page_bootmem(section_nr, page++,
+                                                SECTION_INFO);
+               }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0085-x86-cpufeatures-Enable-new-SSE-AVX-AVX512-CPU-featur.patch b/patches/kernel/0085-x86-cpufeatures-Enable-new-SSE-AVX-AVX512-CPU-featur.patch

new file mode 100644 (file)

index 0000000..bf5c981
--- /dev/null
+++ b/patches/kernel/0085-x86-cpufeatures-Enable-new-SSE-AVX-AVX512-CPU-featur.patch
@@ -0,0 +1,86 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Gayatri Kammela <gayatri.kammela@intel.com>
+Date: Mon, 30 Oct 2017 18:20:29 -0700
+Subject: [PATCH] x86/cpufeatures: Enable new SSE/AVX/AVX512 CPU features
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Add a few new SSE/AVX/AVX512 instruction groups/features for enumeration
+in /proc/cpuinfo: AVX512_VBMI2, GFNI, VAES, VPCLMULQDQ, AVX512_VNNI,
+AVX512_BITALG.
+
+ CPUID.(EAX=7,ECX=0):ECX[bit 6]  AVX512_VBMI2
+ CPUID.(EAX=7,ECX=0):ECX[bit 8]  GFNI
+ CPUID.(EAX=7,ECX=0):ECX[bit 9]  VAES
+ CPUID.(EAX=7,ECX=0):ECX[bit 10] VPCLMULQDQ
+ CPUID.(EAX=7,ECX=0):ECX[bit 11] AVX512_VNNI
+ CPUID.(EAX=7,ECX=0):ECX[bit 12] AVX512_BITALG
+
+Detailed information of CPUID bits for these features can be found
+in the Intel Architecture Instruction Set Extensions and Future Features
+Programming Interface document (refer to Table 1-1. and Table 1-2.).
+A copy of this document is available at
+https://bugzilla.kernel.org/show_bug.cgi?id=197239
+
+Signed-off-by: Gayatri Kammela <gayatri.kammela@intel.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andi Kleen <andi.kleen@intel.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ravi Shankar <ravi.v.shankar@intel.com>
+Cc: Ricardo Neri <ricardo.neri@intel.com>
+Cc: Yang Zhong <yang.zhong@intel.com>
+Cc: bp@alien8.de
+Link: http://lkml.kernel.org/r/1509412829-23380-1-git-send-email-gayatri.kammela@intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit c128dbfa0f879f8ce7b79054037889b0b2240728)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit b29eb29c5aca4708d66fa977db40c779366636a2)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cpufeatures.h | 6 ++++++
+ arch/x86/kernel/cpu/cpuid-deps.c   | 6 ++++++
+ 2 files changed, 12 insertions(+)
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index f4e145c4b06f..c465bd6613ed 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -297,6 +297,12 @@
+ #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
+ #define X86_FEATURE_PKU               (16*32+ 3) /* Protection Keys for Userspace */
+ #define X86_FEATURE_OSPKE     (16*32+ 4) /* OS Protection Keys Enable */
++#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
++#define X86_FEATURE_GFNI      (16*32+ 8) /* Galois Field New Instructions */
++#define X86_FEATURE_VAES      (16*32+ 9) /* Vector AES */
++#define X86_FEATURE_VPCLMULQDQ        (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
++#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */
++#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
+ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
+ #define X86_FEATURE_LA57      (16*32+16) /* 5-level page tables */
+ #define X86_FEATURE_RDPID     (16*32+22) /* RDPID instruction */
+diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
+index c1d49842a411..c21f22d836ad 100644
+--- a/arch/x86/kernel/cpu/cpuid-deps.c
++++ b/arch/x86/kernel/cpu/cpuid-deps.c
+@@ -50,6 +50,12 @@ const static struct cpuid_dep cpuid_deps[] = {
+       { X86_FEATURE_AVX512BW,         X86_FEATURE_AVX512F   },
+       { X86_FEATURE_AVX512VL,         X86_FEATURE_AVX512F   },
+       { X86_FEATURE_AVX512VBMI,       X86_FEATURE_AVX512F   },
++      { X86_FEATURE_AVX512_VBMI2,     X86_FEATURE_AVX512VL  },
++      { X86_FEATURE_GFNI,             X86_FEATURE_AVX512VL  },
++      { X86_FEATURE_VAES,             X86_FEATURE_AVX512VL  },
++      { X86_FEATURE_VPCLMULQDQ,       X86_FEATURE_AVX512VL  },
++      { X86_FEATURE_AVX512_VNNI,      X86_FEATURE_AVX512VL  },
++      { X86_FEATURE_AVX512_BITALG,    X86_FEATURE_AVX512VL  },
+       { X86_FEATURE_AVX512_4VNNIW,    X86_FEATURE_AVX512F   },
+       { X86_FEATURE_AVX512_4FMAPS,    X86_FEATURE_AVX512F   },
+       { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F   },
+-- 
+2.14.2
+
diff --git a/patches/kernel/0085-x86-mm-Relocate-page-fault-error-codes-to-traps.h.patch b/patches/kernel/0085-x86-mm-Relocate-page-fault-error-codes-to-traps.h.patch

deleted file mode 100644 (file)

index fad29e3..0000000
--- a/patches/kernel/0085-x86-mm-Relocate-page-fault-error-codes-to-traps.h.patch
+++ /dev/null
@@ -1,363 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
-Date: Fri, 27 Oct 2017 13:25:28 -0700
-Subject: [PATCH] x86/mm: Relocate page fault error codes to traps.h
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Up to this point, only fault.c used the definitions of the page fault error
-codes. Thus, it made sense to keep them within such file. Other portions of
-code might be interested in those definitions too. For instance, the User-
-Mode Instruction Prevention emulation code will use such definitions to
-emulate a page fault when it is unable to successfully copy the results
-of the emulated instructions to user space.
-
-While relocating the error code enumeration, the prefix X86_ is used to
-make it consistent with the rest of the definitions in traps.h. Of course,
-code using the enumeration had to be updated as well. No functional changes
-were performed.
-
-Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Reviewed-by: Andy Lutomirski <luto@kernel.org>
-Cc: "Michael S. Tsirkin" <mst@redhat.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: ricardo.neri@intel.com
-Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
-Cc: Huang Rui <ray.huang@amd.com>
-Cc: Shuah Khan <shuah@kernel.org>
-Cc: Jonathan Corbet <corbet@lwn.net>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
-Cc: Chris Metcalf <cmetcalf@mellanox.com>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Chen Yucong <slaoub@gmail.com>
-Cc: Vlastimil Babka <vbabka@suse.cz>
-Cc: Masami Hiramatsu <mhiramat@kernel.org>
-Cc: Paolo Bonzini <pbonzini@redhat.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
-Link: https://lkml.kernel.org/r/1509135945-13762-2-git-send-email-ricardo.neri-calderon@linux.intel.com
-
-(cherry picked from commit 1067f030994c69ca1fba8c607437c8895dcf8509)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit a85a07ab9111e3c78797c20b60a664dbd5db4981)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/traps.h | 18 +++++++++
- arch/x86/mm/fault.c          | 88 +++++++++++++++++---------------------------
- 2 files changed, 52 insertions(+), 54 deletions(-)
-
-diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
-index feb89dbe359d..8e5bf86f87e5 100644
---- a/arch/x86/include/asm/traps.h
-+++ b/arch/x86/include/asm/traps.h
-@@ -162,4 +162,22 @@ enum {
-       X86_TRAP_IRET = 32,     /* 32, IRET Exception */
- };
- 
-+/*
-+ * Page fault error code bits:
-+ *
-+ *   bit 0 ==  0: no page found       1: protection fault
-+ *   bit 1 ==  0: read access         1: write access
-+ *   bit 2 ==  0: kernel-mode access  1: user-mode access
-+ *   bit 3 ==                         1: use of reserved bit detected
-+ *   bit 4 ==                         1: fault was an instruction fetch
-+ *   bit 5 ==                         1: protection keys block access
-+ */
-+enum x86_pf_error_code {
-+      X86_PF_PROT     =               1 << 0,
-+      X86_PF_WRITE    =               1 << 1,
-+      X86_PF_USER     =               1 << 2,
-+      X86_PF_RSVD     =               1 << 3,
-+      X86_PF_INSTR    =               1 << 4,
-+      X86_PF_PK       =               1 << 5,
-+};
- #endif /* _ASM_X86_TRAPS_H */
-diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
-index 4ee9eb916826..d3a57e7ad311 100644
---- a/arch/x86/mm/fault.c
-+++ b/arch/x86/mm/fault.c
-@@ -28,26 +28,6 @@
- #define CREATE_TRACE_POINTS
- #include <asm/trace/exceptions.h>
- 
--/*
-- * Page fault error code bits:
-- *
-- *   bit 0 ==  0: no page found       1: protection fault
-- *   bit 1 ==  0: read access         1: write access
-- *   bit 2 ==  0: kernel-mode access  1: user-mode access
-- *   bit 3 ==                         1: use of reserved bit detected
-- *   bit 4 ==                         1: fault was an instruction fetch
-- *   bit 5 ==                         1: protection keys block access
-- */
--enum x86_pf_error_code {
--
--      PF_PROT         =               1 << 0,
--      PF_WRITE        =               1 << 1,
--      PF_USER         =               1 << 2,
--      PF_RSVD         =               1 << 3,
--      PF_INSTR        =               1 << 4,
--      PF_PK           =               1 << 5,
--};
--
- /*
-  * Returns 0 if mmiotrace is disabled, or if the fault is not
-  * handled by mmiotrace:
-@@ -149,7 +129,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
-        * If it was a exec (instruction fetch) fault on NX page, then
-        * do not ignore the fault:
-        */
--      if (error_code & PF_INSTR)
-+      if (error_code & X86_PF_INSTR)
-               return 0;
- 
-       instr = (void *)convert_ip_to_linear(current, regs);
-@@ -179,7 +159,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
-  * siginfo so userspace can discover which protection key was set
-  * on the PTE.
-  *
-- * If we get here, we know that the hardware signaled a PF_PK
-+ * If we get here, we know that the hardware signaled a X86_PF_PK
-  * fault and that there was a VMA once we got in the fault
-  * handler.  It does *not* guarantee that the VMA we find here
-  * was the one that we faulted on.
-@@ -204,7 +184,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
-       /*
-        * force_sig_info_fault() is called from a number of
-        * contexts, some of which have a VMA and some of which
--       * do not.  The PF_PK handing happens after we have a
-+       * do not.  The X86_PF_PK handing happens after we have a
-        * valid VMA, so we should never reach this without a
-        * valid VMA.
-        */
-@@ -693,7 +673,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
-       if (!oops_may_print())
-               return;
- 
--      if (error_code & PF_INSTR) {
-+      if (error_code & X86_PF_INSTR) {
-               unsigned int level;
-               pgd_t *pgd;
-               pte_t *pte;
-@@ -775,7 +755,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
-                */
-               if (current->thread.sig_on_uaccess_err && signal) {
-                       tsk->thread.trap_nr = X86_TRAP_PF;
--                      tsk->thread.error_code = error_code | PF_USER;
-+                      tsk->thread.error_code = error_code | X86_PF_USER;
-                       tsk->thread.cr2 = address;
- 
-                       /* XXX: hwpoison faults will set the wrong code. */
-@@ -894,7 +874,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
-       struct task_struct *tsk = current;
- 
-       /* User mode accesses just cause a SIGSEGV */
--      if (error_code & PF_USER) {
-+      if (error_code & X86_PF_USER) {
-               /*
-                * It's possible to have interrupts off here:
-                */
-@@ -915,7 +895,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
-                * Instruction fetch faults in the vsyscall page might need
-                * emulation.
-                */
--              if (unlikely((error_code & PF_INSTR) &&
-+              if (unlikely((error_code & X86_PF_INSTR) &&
-                            ((address & ~0xfff) == VSYSCALL_ADDR))) {
-                       if (emulate_vsyscall(regs, address))
-                               return;
-@@ -928,7 +908,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
-                * are always protection faults.
-                */
-               if (address >= TASK_SIZE_MAX)
--                      error_code |= PF_PROT;
-+                      error_code |= X86_PF_PROT;
- 
-               if (likely(show_unhandled_signals))
-                       show_signal_msg(regs, error_code, address, tsk);
-@@ -989,11 +969,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
- 
-       if (!boot_cpu_has(X86_FEATURE_OSPKE))
-               return false;
--      if (error_code & PF_PK)
-+      if (error_code & X86_PF_PK)
-               return true;
-       /* this checks permission keys on the VMA: */
--      if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
--                              (error_code & PF_INSTR), foreign))
-+      if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
-+                                     (error_code & X86_PF_INSTR), foreign))
-               return true;
-       return false;
- }
-@@ -1021,7 +1001,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
-       int code = BUS_ADRERR;
- 
-       /* Kernel mode? Handle exceptions or die: */
--      if (!(error_code & PF_USER)) {
-+      if (!(error_code & X86_PF_USER)) {
-               no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
-               return;
-       }
-@@ -1049,14 +1029,14 @@ static noinline void
- mm_fault_error(struct pt_regs *regs, unsigned long error_code,
-              unsigned long address, u32 *pkey, unsigned int fault)
- {
--      if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
-+      if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
-               no_context(regs, error_code, address, 0, 0);
-               return;
-       }
- 
-       if (fault & VM_FAULT_OOM) {
-               /* Kernel mode? Handle exceptions or die: */
--              if (!(error_code & PF_USER)) {
-+              if (!(error_code & X86_PF_USER)) {
-                       no_context(regs, error_code, address,
-                                  SIGSEGV, SEGV_MAPERR);
-                       return;
-@@ -1081,16 +1061,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
- 
- static int spurious_fault_check(unsigned long error_code, pte_t *pte)
- {
--      if ((error_code & PF_WRITE) && !pte_write(*pte))
-+      if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
-               return 0;
- 
--      if ((error_code & PF_INSTR) && !pte_exec(*pte))
-+      if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
-               return 0;
-       /*
-        * Note: We do not do lazy flushing on protection key
--       * changes, so no spurious fault will ever set PF_PK.
-+       * changes, so no spurious fault will ever set X86_PF_PK.
-        */
--      if ((error_code & PF_PK))
-+      if ((error_code & X86_PF_PK))
-               return 1;
- 
-       return 1;
-@@ -1136,8 +1116,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
-        * change, so user accesses are not expected to cause spurious
-        * faults.
-        */
--      if (error_code != (PF_WRITE | PF_PROT)
--          && error_code != (PF_INSTR | PF_PROT))
-+      if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
-+          error_code != (X86_PF_INSTR | X86_PF_PROT))
-               return 0;
- 
-       pgd = init_mm.pgd + pgd_index(address);
-@@ -1197,19 +1177,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
-        * always an unconditional error and can never result in
-        * a follow-up action to resolve the fault, like a COW.
-        */
--      if (error_code & PF_PK)
-+      if (error_code & X86_PF_PK)
-               return 1;
- 
-       /*
-        * Make sure to check the VMA so that we do not perform
--       * faults just to hit a PF_PK as soon as we fill in a
-+       * faults just to hit a X86_PF_PK as soon as we fill in a
-        * page.
-        */
--      if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
--                              (error_code & PF_INSTR), foreign))
-+      if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
-+                                     (error_code & X86_PF_INSTR), foreign))
-               return 1;
- 
--      if (error_code & PF_WRITE) {
-+      if (error_code & X86_PF_WRITE) {
-               /* write, present and write, not present: */
-               if (unlikely(!(vma->vm_flags & VM_WRITE)))
-                       return 1;
-@@ -1217,7 +1197,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
-       }
- 
-       /* read, present: */
--      if (unlikely(error_code & PF_PROT))
-+      if (unlikely(error_code & X86_PF_PROT))
-               return 1;
- 
-       /* read, not present: */
-@@ -1240,7 +1220,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
-       if (!static_cpu_has(X86_FEATURE_SMAP))
-               return false;
- 
--      if (error_code & PF_USER)
-+      if (error_code & X86_PF_USER)
-               return false;
- 
-       if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
-@@ -1293,7 +1273,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
-        * protection error (error_code & 9) == 0.
-        */
-       if (unlikely(fault_in_kernel_space(address))) {
--              if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
-+              if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
-                       if (vmalloc_fault(address) >= 0)
-                               return;
- 
-@@ -1321,7 +1301,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
-       if (unlikely(kprobes_fault(regs)))
-               return;
- 
--      if (unlikely(error_code & PF_RSVD))
-+      if (unlikely(error_code & X86_PF_RSVD))
-               pgtable_bad(regs, error_code, address);
- 
-       if (unlikely(smap_violation(error_code, regs))) {
-@@ -1347,7 +1327,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
-        */
-       if (user_mode(regs)) {
-               local_irq_enable();
--              error_code |= PF_USER;
-+              error_code |= X86_PF_USER;
-               flags |= FAULT_FLAG_USER;
-       } else {
-               if (regs->flags & X86_EFLAGS_IF)
-@@ -1356,9 +1336,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
- 
-       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
- 
--      if (error_code & PF_WRITE)
-+      if (error_code & X86_PF_WRITE)
-               flags |= FAULT_FLAG_WRITE;
--      if (error_code & PF_INSTR)
-+      if (error_code & X86_PF_INSTR)
-               flags |= FAULT_FLAG_INSTRUCTION;
- 
-       /*
-@@ -1378,7 +1358,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
-        * space check, thus avoiding the deadlock:
-        */
-       if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
--              if ((error_code & PF_USER) == 0 &&
-+              if (!(error_code & X86_PF_USER) &&
-                   !search_exception_tables(regs->ip)) {
-                       bad_area_nosemaphore(regs, error_code, address, NULL);
-                       return;
-@@ -1405,7 +1385,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
-               bad_area(regs, error_code, address);
-               return;
-       }
--      if (error_code & PF_USER) {
-+      if (error_code & X86_PF_USER) {
-               /*
-                * Accessing the stack below %sp is always a bug.
-                * The large cushion allows instructions like enter
--- 
-2.14.2
-
diff --git a/patches/kernel/0086-x86-boot-Relocate-definition-of-the-initial-state-of.patch b/patches/kernel/0086-x86-boot-Relocate-definition-of-the-initial-state-of.patch

deleted file mode 100644 (file)

index 936d6b0..0000000
--- a/patches/kernel/0086-x86-boot-Relocate-definition-of-the-initial-state-of.patch
+++ /dev/null
@@ -1,103 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
-Date: Fri, 27 Oct 2017 13:25:29 -0700
-Subject: [PATCH] x86/boot: Relocate definition of the initial state of CR0
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Both head_32.S and head_64.S utilize the same value to initialize the
-control register CR0. Also, other parts of the kernel might want to access
-this initial definition (e.g., emulation code for User-Mode Instruction
-Prevention uses this state to provide a sane dummy value for CR0 when
-emulating the smsw instruction). Thus, relocate this definition to a
-header file from which it can be conveniently accessed.
-
-Suggested-by: Borislav Petkov <bp@alien8.de>
-Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Reviewed-by: Andy Lutomirski <luto@kernel.org>
-Cc: "Michael S. Tsirkin" <mst@redhat.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: ricardo.neri@intel.com
-Cc: linux-mm@kvack.org
-Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
-Cc: Huang Rui <ray.huang@amd.com>
-Cc: Shuah Khan <shuah@kernel.org>
-Cc: linux-arch@vger.kernel.org
-Cc: Jonathan Corbet <corbet@lwn.net>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Chris Metcalf <cmetcalf@mellanox.com>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Chen Yucong <slaoub@gmail.com>
-Cc: Vlastimil Babka <vbabka@suse.cz>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Andy Lutomirski <luto@amacapital.net>
-Cc: Masami Hiramatsu <mhiramat@kernel.org>
-Cc: Paolo Bonzini <pbonzini@redhat.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Link: https://lkml.kernel.org/r/1509135945-13762-3-git-send-email-ricardo.neri-calderon@linux.intel.com
-
-(cherry picked from commit b0ce5b8c95c83a7b98c679b117e3d6ae6f97154b)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 27c31a88c22edab269abe17c0ac7db0351d26c5f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/uapi/asm/processor-flags.h | 3 +++
- arch/x86/kernel/head_32.S                   | 3 ---
- arch/x86/kernel/head_64.S                   | 3 ---
- 3 files changed, 3 insertions(+), 6 deletions(-)
-
-diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
-index 185f3d10c194..39946d0a1d41 100644
---- a/arch/x86/include/uapi/asm/processor-flags.h
-+++ b/arch/x86/include/uapi/asm/processor-flags.h
-@@ -151,5 +151,8 @@
- #define CX86_ARR_BASE 0xc4
- #define CX86_RCR_BASE 0xdc
- 
-+#define CR0_STATE     (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
-+                       X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
-+                       X86_CR0_PG)
- 
- #endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
-diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
-index 337a65377baf..7bbcdb1ea31a 100644
---- a/arch/x86/kernel/head_32.S
-+++ b/arch/x86/kernel/head_32.S
-@@ -213,9 +213,6 @@ ENTRY(startup_32_smp)
- #endif
- 
- .Ldefault_entry:
--#define CR0_STATE     (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
--                       X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
--                       X86_CR0_PG)
-       movl $(CR0_STATE & ~X86_CR0_PG),%eax
-       movl %eax,%cr0
- 
-diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
-index a2d8541b1da4..4117c1e0b3d2 100644
---- a/arch/x86/kernel/head_64.S
-+++ b/arch/x86/kernel/head_64.S
-@@ -137,9 +137,6 @@ ENTRY(secondary_startup_64)
- 1:    wrmsr                           /* Make changes effective */
- 
-       /* Setup cr0 */
--#define CR0_STATE     (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
--                       X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
--                       X86_CR0_PG)
-       movl    $CR0_STATE, %eax
-       /* Make changes effective */
-       movq    %rax, %cr0
--- 
-2.14.2
-
diff --git a/patches/kernel/0086-x86-mm-Relocate-page-fault-error-codes-to-traps.h.patch b/patches/kernel/0086-x86-mm-Relocate-page-fault-error-codes-to-traps.h.patch

new file mode 100644 (file)

index 0000000..fad29e3
--- /dev/null
+++ b/patches/kernel/0086-x86-mm-Relocate-page-fault-error-codes-to-traps.h.patch
@@ -0,0 +1,363 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Date: Fri, 27 Oct 2017 13:25:28 -0700
+Subject: [PATCH] x86/mm: Relocate page fault error codes to traps.h
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Up to this point, only fault.c used the definitions of the page fault error
+codes. Thus, it made sense to keep them within such file. Other portions of
+code might be interested in those definitions too. For instance, the User-
+Mode Instruction Prevention emulation code will use such definitions to
+emulate a page fault when it is unable to successfully copy the results
+of the emulated instructions to user space.
+
+While relocating the error code enumeration, the prefix X86_ is used to
+make it consistent with the rest of the definitions in traps.h. Of course,
+code using the enumeration had to be updated as well. No functional changes
+were performed.
+
+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Andy Lutomirski <luto@kernel.org>
+Cc: "Michael S. Tsirkin" <mst@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: ricardo.neri@intel.com
+Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
+Cc: Huang Rui <ray.huang@amd.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
+Cc: Chris Metcalf <cmetcalf@mellanox.com>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Chen Yucong <slaoub@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Link: https://lkml.kernel.org/r/1509135945-13762-2-git-send-email-ricardo.neri-calderon@linux.intel.com
+
+(cherry picked from commit 1067f030994c69ca1fba8c607437c8895dcf8509)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit a85a07ab9111e3c78797c20b60a664dbd5db4981)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/traps.h | 18 +++++++++
+ arch/x86/mm/fault.c          | 88 +++++++++++++++++---------------------------
+ 2 files changed, 52 insertions(+), 54 deletions(-)
+
+diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
+index feb89dbe359d..8e5bf86f87e5 100644
+--- a/arch/x86/include/asm/traps.h
++++ b/arch/x86/include/asm/traps.h
+@@ -162,4 +162,22 @@ enum {
+       X86_TRAP_IRET = 32,     /* 32, IRET Exception */
+ };
+ 
++/*
++ * Page fault error code bits:
++ *
++ *   bit 0 ==  0: no page found       1: protection fault
++ *   bit 1 ==  0: read access         1: write access
++ *   bit 2 ==  0: kernel-mode access  1: user-mode access
++ *   bit 3 ==                         1: use of reserved bit detected
++ *   bit 4 ==                         1: fault was an instruction fetch
++ *   bit 5 ==                         1: protection keys block access
++ */
++enum x86_pf_error_code {
++      X86_PF_PROT     =               1 << 0,
++      X86_PF_WRITE    =               1 << 1,
++      X86_PF_USER     =               1 << 2,
++      X86_PF_RSVD     =               1 << 3,
++      X86_PF_INSTR    =               1 << 4,
++      X86_PF_PK       =               1 << 5,
++};
+ #endif /* _ASM_X86_TRAPS_H */
+diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
+index 4ee9eb916826..d3a57e7ad311 100644
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -28,26 +28,6 @@
+ #define CREATE_TRACE_POINTS
+ #include <asm/trace/exceptions.h>
+ 
+-/*
+- * Page fault error code bits:
+- *
+- *   bit 0 ==  0: no page found       1: protection fault
+- *   bit 1 ==  0: read access         1: write access
+- *   bit 2 ==  0: kernel-mode access  1: user-mode access
+- *   bit 3 ==                         1: use of reserved bit detected
+- *   bit 4 ==                         1: fault was an instruction fetch
+- *   bit 5 ==                         1: protection keys block access
+- */
+-enum x86_pf_error_code {
+-
+-      PF_PROT         =               1 << 0,
+-      PF_WRITE        =               1 << 1,
+-      PF_USER         =               1 << 2,
+-      PF_RSVD         =               1 << 3,
+-      PF_INSTR        =               1 << 4,
+-      PF_PK           =               1 << 5,
+-};
+-
+ /*
+  * Returns 0 if mmiotrace is disabled, or if the fault is not
+  * handled by mmiotrace:
+@@ -149,7 +129,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
+        * If it was a exec (instruction fetch) fault on NX page, then
+        * do not ignore the fault:
+        */
+-      if (error_code & PF_INSTR)
++      if (error_code & X86_PF_INSTR)
+               return 0;
+ 
+       instr = (void *)convert_ip_to_linear(current, regs);
+@@ -179,7 +159,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
+  * siginfo so userspace can discover which protection key was set
+  * on the PTE.
+  *
+- * If we get here, we know that the hardware signaled a PF_PK
++ * If we get here, we know that the hardware signaled a X86_PF_PK
+  * fault and that there was a VMA once we got in the fault
+  * handler.  It does *not* guarantee that the VMA we find here
+  * was the one that we faulted on.
+@@ -204,7 +184,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
+       /*
+        * force_sig_info_fault() is called from a number of
+        * contexts, some of which have a VMA and some of which
+-       * do not.  The PF_PK handing happens after we have a
++       * do not.  The X86_PF_PK handing happens after we have a
+        * valid VMA, so we should never reach this without a
+        * valid VMA.
+        */
+@@ -693,7 +673,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
+       if (!oops_may_print())
+               return;
+ 
+-      if (error_code & PF_INSTR) {
++      if (error_code & X86_PF_INSTR) {
+               unsigned int level;
+               pgd_t *pgd;
+               pte_t *pte;
+@@ -775,7 +755,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
+                */
+               if (current->thread.sig_on_uaccess_err && signal) {
+                       tsk->thread.trap_nr = X86_TRAP_PF;
+-                      tsk->thread.error_code = error_code | PF_USER;
++                      tsk->thread.error_code = error_code | X86_PF_USER;
+                       tsk->thread.cr2 = address;
+ 
+                       /* XXX: hwpoison faults will set the wrong code. */
+@@ -894,7 +874,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+       struct task_struct *tsk = current;
+ 
+       /* User mode accesses just cause a SIGSEGV */
+-      if (error_code & PF_USER) {
++      if (error_code & X86_PF_USER) {
+               /*
+                * It's possible to have interrupts off here:
+                */
+@@ -915,7 +895,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+                * Instruction fetch faults in the vsyscall page might need
+                * emulation.
+                */
+-              if (unlikely((error_code & PF_INSTR) &&
++              if (unlikely((error_code & X86_PF_INSTR) &&
+                            ((address & ~0xfff) == VSYSCALL_ADDR))) {
+                       if (emulate_vsyscall(regs, address))
+                               return;
+@@ -928,7 +908,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+                * are always protection faults.
+                */
+               if (address >= TASK_SIZE_MAX)
+-                      error_code |= PF_PROT;
++                      error_code |= X86_PF_PROT;
+ 
+               if (likely(show_unhandled_signals))
+                       show_signal_msg(regs, error_code, address, tsk);
+@@ -989,11 +969,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
+ 
+       if (!boot_cpu_has(X86_FEATURE_OSPKE))
+               return false;
+-      if (error_code & PF_PK)
++      if (error_code & X86_PF_PK)
+               return true;
+       /* this checks permission keys on the VMA: */
+-      if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
+-                              (error_code & PF_INSTR), foreign))
++      if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
++                                     (error_code & X86_PF_INSTR), foreign))
+               return true;
+       return false;
+ }
+@@ -1021,7 +1001,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+       int code = BUS_ADRERR;
+ 
+       /* Kernel mode? Handle exceptions or die: */
+-      if (!(error_code & PF_USER)) {
++      if (!(error_code & X86_PF_USER)) {
+               no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
+               return;
+       }
+@@ -1049,14 +1029,14 @@ static noinline void
+ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
+              unsigned long address, u32 *pkey, unsigned int fault)
+ {
+-      if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
++      if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
+               no_context(regs, error_code, address, 0, 0);
+               return;
+       }
+ 
+       if (fault & VM_FAULT_OOM) {
+               /* Kernel mode? Handle exceptions or die: */
+-              if (!(error_code & PF_USER)) {
++              if (!(error_code & X86_PF_USER)) {
+                       no_context(regs, error_code, address,
+                                  SIGSEGV, SEGV_MAPERR);
+                       return;
+@@ -1081,16 +1061,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
+ 
+ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
+ {
+-      if ((error_code & PF_WRITE) && !pte_write(*pte))
++      if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
+               return 0;
+ 
+-      if ((error_code & PF_INSTR) && !pte_exec(*pte))
++      if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
+               return 0;
+       /*
+        * Note: We do not do lazy flushing on protection key
+-       * changes, so no spurious fault will ever set PF_PK.
++       * changes, so no spurious fault will ever set X86_PF_PK.
+        */
+-      if ((error_code & PF_PK))
++      if ((error_code & X86_PF_PK))
+               return 1;
+ 
+       return 1;
+@@ -1136,8 +1116,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
+        * change, so user accesses are not expected to cause spurious
+        * faults.
+        */
+-      if (error_code != (PF_WRITE | PF_PROT)
+-          && error_code != (PF_INSTR | PF_PROT))
++      if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
++          error_code != (X86_PF_INSTR | X86_PF_PROT))
+               return 0;
+ 
+       pgd = init_mm.pgd + pgd_index(address);
+@@ -1197,19 +1177,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
+        * always an unconditional error and can never result in
+        * a follow-up action to resolve the fault, like a COW.
+        */
+-      if (error_code & PF_PK)
++      if (error_code & X86_PF_PK)
+               return 1;
+ 
+       /*
+        * Make sure to check the VMA so that we do not perform
+-       * faults just to hit a PF_PK as soon as we fill in a
++       * faults just to hit a X86_PF_PK as soon as we fill in a
+        * page.
+        */
+-      if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
+-                              (error_code & PF_INSTR), foreign))
++      if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
++                                     (error_code & X86_PF_INSTR), foreign))
+               return 1;
+ 
+-      if (error_code & PF_WRITE) {
++      if (error_code & X86_PF_WRITE) {
+               /* write, present and write, not present: */
+               if (unlikely(!(vma->vm_flags & VM_WRITE)))
+                       return 1;
+@@ -1217,7 +1197,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
+       }
+ 
+       /* read, present: */
+-      if (unlikely(error_code & PF_PROT))
++      if (unlikely(error_code & X86_PF_PROT))
+               return 1;
+ 
+       /* read, not present: */
+@@ -1240,7 +1220,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
+       if (!static_cpu_has(X86_FEATURE_SMAP))
+               return false;
+ 
+-      if (error_code & PF_USER)
++      if (error_code & X86_PF_USER)
+               return false;
+ 
+       if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
+@@ -1293,7 +1273,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
+        * protection error (error_code & 9) == 0.
+        */
+       if (unlikely(fault_in_kernel_space(address))) {
+-              if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
++              if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
+                       if (vmalloc_fault(address) >= 0)
+                               return;
+ 
+@@ -1321,7 +1301,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
+       if (unlikely(kprobes_fault(regs)))
+               return;
+ 
+-      if (unlikely(error_code & PF_RSVD))
++      if (unlikely(error_code & X86_PF_RSVD))
+               pgtable_bad(regs, error_code, address);
+ 
+       if (unlikely(smap_violation(error_code, regs))) {
+@@ -1347,7 +1327,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
+        */
+       if (user_mode(regs)) {
+               local_irq_enable();
+-              error_code |= PF_USER;
++              error_code |= X86_PF_USER;
+               flags |= FAULT_FLAG_USER;
+       } else {
+               if (regs->flags & X86_EFLAGS_IF)
+@@ -1356,9 +1336,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
+ 
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+ 
+-      if (error_code & PF_WRITE)
++      if (error_code & X86_PF_WRITE)
+               flags |= FAULT_FLAG_WRITE;
+-      if (error_code & PF_INSTR)
++      if (error_code & X86_PF_INSTR)
+               flags |= FAULT_FLAG_INSTRUCTION;
+ 
+       /*
+@@ -1378,7 +1358,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
+        * space check, thus avoiding the deadlock:
+        */
+       if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
+-              if ((error_code & PF_USER) == 0 &&
++              if (!(error_code & X86_PF_USER) &&
+                   !search_exception_tables(regs->ip)) {
+                       bad_area_nosemaphore(regs, error_code, address, NULL);
+                       return;
+@@ -1405,7 +1385,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
+               bad_area(regs, error_code, address);
+               return;
+       }
+-      if (error_code & PF_USER) {
++      if (error_code & X86_PF_USER) {
+               /*
+                * Accessing the stack below %sp is always a bug.
+                * The large cushion allows instructions like enter
+-- 
+2.14.2
+
diff --git a/patches/kernel/0087-ptrace-x86-Make-user_64bit_mode-available-to-32-bit-.patch b/patches/kernel/0087-ptrace-x86-Make-user_64bit_mode-available-to-32-bit-.patch

deleted file mode 100644 (file)

index 65e6b7c..0000000
--- a/patches/kernel/0087-ptrace-x86-Make-user_64bit_mode-available-to-32-bit-.patch
+++ /dev/null
@@ -1,92 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
-Date: Fri, 27 Oct 2017 13:25:30 -0700
-Subject: [PATCH] ptrace,x86: Make user_64bit_mode() available to 32-bit builds
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-In its current form, user_64bit_mode() can only be used when CONFIG_X86_64
-is selected. This implies that code built with CONFIG_X86_64=n cannot use
-it. If a piece of code needs to be built for both CONFIG_X86_64=y and
-CONFIG_X86_64=n and wants to use this function, it needs to wrap it in
-an #ifdef/#endif; potentially, in multiple places.
-
-This can be easily avoided with a single #ifdef/#endif pair within
-user_64bit_mode() itself.
-
-Suggested-by: Borislav Petkov <bp@suse.de>
-Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: "Michael S. Tsirkin" <mst@redhat.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: ricardo.neri@intel.com
-Cc: Adrian Hunter <adrian.hunter@intel.com>
-Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
-Cc: Huang Rui <ray.huang@amd.com>
-Cc: Qiaowei Ren <qiaowei.ren@intel.com>
-Cc: Shuah Khan <shuah@kernel.org>
-Cc: Kees Cook <keescook@chromium.org>
-Cc: Jonathan Corbet <corbet@lwn.net>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Dmitry Vyukov <dvyukov@google.com>
-Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
-Cc: Chris Metcalf <cmetcalf@mellanox.com>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Colin Ian King <colin.king@canonical.com>
-Cc: Chen Yucong <slaoub@gmail.com>
-Cc: Adam Buchbinder <adam.buchbinder@gmail.com>
-Cc: Vlastimil Babka <vbabka@suse.cz>
-Cc: Lorenzo Stoakes <lstoakes@gmail.com>
-Cc: Masami Hiramatsu <mhiramat@kernel.org>
-Cc: Paolo Bonzini <pbonzini@redhat.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Thomas Garnier <thgarnie@google.com>
-Link: https://lkml.kernel.org/r/1509135945-13762-4-git-send-email-ricardo.neri-calderon@linux.intel.com
-
-(cherry picked from commit e27c310af5c05cf876d9cad006928076c27f54d4)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 20ddf08f867d3d96788299cd2fb7676590d64250)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/ptrace.h | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
-index 2b5d686ea9f3..ea78a8438a8a 100644
---- a/arch/x86/include/asm/ptrace.h
-+++ b/arch/x86/include/asm/ptrace.h
-@@ -115,9 +115,9 @@ static inline int v8086_mode(struct pt_regs *regs)
- #endif
- }
- 
--#ifdef CONFIG_X86_64
- static inline bool user_64bit_mode(struct pt_regs *regs)
- {
-+#ifdef CONFIG_X86_64
- #ifndef CONFIG_PARAVIRT
-       /*
-        * On non-paravirt systems, this is the only long mode CPL 3
-@@ -128,8 +128,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
-       /* Headers are too twisted for this to go in paravirt.h. */
-       return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
- #endif
-+#else /* !CONFIG_X86_64 */
-+      return false;
-+#endif
- }
- 
-+#ifdef CONFIG_X86_64
- #define current_user_stack_pointer()  current_pt_regs()->sp
- #define compat_user_stack_pointer()   current_pt_regs()->sp
- #endif
--- 
-2.14.2
-
diff --git a/patches/kernel/0087-x86-boot-Relocate-definition-of-the-initial-state-of.patch b/patches/kernel/0087-x86-boot-Relocate-definition-of-the-initial-state-of.patch

new file mode 100644 (file)

index 0000000..936d6b0
--- /dev/null
+++ b/patches/kernel/0087-x86-boot-Relocate-definition-of-the-initial-state-of.patch
@@ -0,0 +1,103 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Date: Fri, 27 Oct 2017 13:25:29 -0700
+Subject: [PATCH] x86/boot: Relocate definition of the initial state of CR0
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Both head_32.S and head_64.S utilize the same value to initialize the
+control register CR0. Also, other parts of the kernel might want to access
+this initial definition (e.g., emulation code for User-Mode Instruction
+Prevention uses this state to provide a sane dummy value for CR0 when
+emulating the smsw instruction). Thus, relocate this definition to a
+header file from which it can be conveniently accessed.
+
+Suggested-by: Borislav Petkov <bp@alien8.de>
+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Andy Lutomirski <luto@kernel.org>
+Cc: "Michael S. Tsirkin" <mst@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: ricardo.neri@intel.com
+Cc: linux-mm@kvack.org
+Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
+Cc: Huang Rui <ray.huang@amd.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: linux-arch@vger.kernel.org
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Chris Metcalf <cmetcalf@mellanox.com>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Chen Yucong <slaoub@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lkml.kernel.org/r/1509135945-13762-3-git-send-email-ricardo.neri-calderon@linux.intel.com
+
+(cherry picked from commit b0ce5b8c95c83a7b98c679b117e3d6ae6f97154b)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 27c31a88c22edab269abe17c0ac7db0351d26c5f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/uapi/asm/processor-flags.h | 3 +++
+ arch/x86/kernel/head_32.S                   | 3 ---
+ arch/x86/kernel/head_64.S                   | 3 ---
+ 3 files changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
+index 185f3d10c194..39946d0a1d41 100644
+--- a/arch/x86/include/uapi/asm/processor-flags.h
++++ b/arch/x86/include/uapi/asm/processor-flags.h
+@@ -151,5 +151,8 @@
+ #define CX86_ARR_BASE 0xc4
+ #define CX86_RCR_BASE 0xdc
+ 
++#define CR0_STATE     (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
++                       X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
++                       X86_CR0_PG)
+ 
+ #endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
+diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
+index 337a65377baf..7bbcdb1ea31a 100644
+--- a/arch/x86/kernel/head_32.S
++++ b/arch/x86/kernel/head_32.S
+@@ -213,9 +213,6 @@ ENTRY(startup_32_smp)
+ #endif
+ 
+ .Ldefault_entry:
+-#define CR0_STATE     (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
+-                       X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
+-                       X86_CR0_PG)
+       movl $(CR0_STATE & ~X86_CR0_PG),%eax
+       movl %eax,%cr0
+ 
+diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
+index a2d8541b1da4..4117c1e0b3d2 100644
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -137,9 +137,6 @@ ENTRY(secondary_startup_64)
+ 1:    wrmsr                           /* Make changes effective */
+ 
+       /* Setup cr0 */
+-#define CR0_STATE     (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
+-                       X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
+-                       X86_CR0_PG)
+       movl    $CR0_STATE, %eax
+       /* Make changes effective */
+       movq    %rax, %cr0
+-- 
+2.14.2
+
diff --git a/patches/kernel/0088-ptrace-x86-Make-user_64bit_mode-available-to-32-bit-.patch b/patches/kernel/0088-ptrace-x86-Make-user_64bit_mode-available-to-32-bit-.patch

new file mode 100644 (file)

index 0000000..65e6b7c
--- /dev/null
+++ b/patches/kernel/0088-ptrace-x86-Make-user_64bit_mode-available-to-32-bit-.patch
@@ -0,0 +1,92 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Date: Fri, 27 Oct 2017 13:25:30 -0700
+Subject: [PATCH] ptrace,x86: Make user_64bit_mode() available to 32-bit builds
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+In its current form, user_64bit_mode() can only be used when CONFIG_X86_64
+is selected. This implies that code built with CONFIG_X86_64=n cannot use
+it. If a piece of code needs to be built for both CONFIG_X86_64=y and
+CONFIG_X86_64=n and wants to use this function, it needs to wrap it in
+an #ifdef/#endif; potentially, in multiple places.
+
+This can be easily avoided with a single #ifdef/#endif pair within
+user_64bit_mode() itself.
+
+Suggested-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: "Michael S. Tsirkin" <mst@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: ricardo.neri@intel.com
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
+Cc: Huang Rui <ray.huang@amd.com>
+Cc: Qiaowei Ren <qiaowei.ren@intel.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
+Cc: Chris Metcalf <cmetcalf@mellanox.com>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Colin Ian King <colin.king@canonical.com>
+Cc: Chen Yucong <slaoub@gmail.com>
+Cc: Adam Buchbinder <adam.buchbinder@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Lorenzo Stoakes <lstoakes@gmail.com>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Thomas Garnier <thgarnie@google.com>
+Link: https://lkml.kernel.org/r/1509135945-13762-4-git-send-email-ricardo.neri-calderon@linux.intel.com
+
+(cherry picked from commit e27c310af5c05cf876d9cad006928076c27f54d4)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 20ddf08f867d3d96788299cd2fb7676590d64250)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/ptrace.h | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
+index 2b5d686ea9f3..ea78a8438a8a 100644
+--- a/arch/x86/include/asm/ptrace.h
++++ b/arch/x86/include/asm/ptrace.h
+@@ -115,9 +115,9 @@ static inline int v8086_mode(struct pt_regs *regs)
+ #endif
+ }
+ 
+-#ifdef CONFIG_X86_64
+ static inline bool user_64bit_mode(struct pt_regs *regs)
+ {
++#ifdef CONFIG_X86_64
+ #ifndef CONFIG_PARAVIRT
+       /*
+        * On non-paravirt systems, this is the only long mode CPL 3
+@@ -128,8 +128,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
+       /* Headers are too twisted for this to go in paravirt.h. */
+       return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
+ #endif
++#else /* !CONFIG_X86_64 */
++      return false;
++#endif
+ }
+ 
++#ifdef CONFIG_X86_64
+ #define current_user_stack_pointer()  current_pt_regs()->sp
+ #define compat_user_stack_pointer()   current_pt_regs()->sp
+ #endif
+-- 
+2.14.2
+
diff --git a/patches/kernel/0088-x86-entry-64-Remove-the-restore_c_regs_and_iret-labe.patch b/patches/kernel/0088-x86-entry-64-Remove-the-restore_c_regs_and_iret-labe.patch

deleted file mode 100644 (file)

index 436f7da..0000000
--- a/patches/kernel/0088-x86-entry-64-Remove-the-restore_c_regs_and_iret-labe.patch
+++ /dev/null
@@ -1,74 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:58:58 -0700
-Subject: [PATCH] x86/entry/64: Remove the restore_c_regs_and_iret label
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The only user was the 64-bit opportunistic SYSRET failure path, and
-that path didn't really need it.  This change makes the
-opportunistic SYSRET code a bit more straightforward and gets rid of
-the label.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/be3006a7ad3326e3458cf1cc55d416252cbe1986.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 9da78ba6b47b46428cfdfc0851511ab29c869798)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 629c8b858cbe72e88e7f44a8f10e1b434ab80721)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S | 5 ++---
- 1 file changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 34adfe0221d2..fac354ddf056 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -245,7 +245,6 @@ entry_SYSCALL64_slow_path:
-       call    do_syscall_64           /* returns with IRQs disabled */
- 
- return_from_SYSCALL_64:
--      RESTORE_EXTRA_REGS
-       TRACE_IRQS_IRETQ                /* we're about to change IF */
- 
-       /*
-@@ -314,6 +313,7 @@ return_from_SYSCALL_64:
-        */
- syscall_return_via_sysret:
-       /* rcx and r11 are already restored (see code above) */
-+      RESTORE_EXTRA_REGS
-       RESTORE_C_REGS_EXCEPT_RCX_R11
-       movq    RSP(%rsp), %rsp
-       UNWIND_HINT_EMPTY
-@@ -321,7 +321,7 @@ syscall_return_via_sysret:
- 
- opportunistic_sysret_failed:
-       SWAPGS
--      jmp     restore_c_regs_and_iret
-+      jmp     restore_regs_and_iret
- END(entry_SYSCALL_64)
- 
- ENTRY(stub_ptregs_64)
-@@ -638,7 +638,6 @@ retint_kernel:
-  */
- GLOBAL(restore_regs_and_iret)
-       RESTORE_EXTRA_REGS
--restore_c_regs_and_iret:
-       RESTORE_C_REGS
-       REMOVE_PT_GPREGS_FROM_STACK 8
-       INTERRUPT_RETURN
--- 
-2.14.2
-
diff --git a/patches/kernel/0089-x86-entry-64-Remove-the-restore_c_regs_and_iret-labe.patch b/patches/kernel/0089-x86-entry-64-Remove-the-restore_c_regs_and_iret-labe.patch

new file mode 100644 (file)

index 0000000..436f7da
--- /dev/null
+++ b/patches/kernel/0089-x86-entry-64-Remove-the-restore_c_regs_and_iret-labe.patch
@@ -0,0 +1,74 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:58:58 -0700
+Subject: [PATCH] x86/entry/64: Remove the restore_c_regs_and_iret label
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The only user was the 64-bit opportunistic SYSRET failure path, and
+that path didn't really need it.  This change makes the
+opportunistic SYSRET code a bit more straightforward and gets rid of
+the label.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/be3006a7ad3326e3458cf1cc55d416252cbe1986.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 9da78ba6b47b46428cfdfc0851511ab29c869798)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 629c8b858cbe72e88e7f44a8f10e1b434ab80721)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 34adfe0221d2..fac354ddf056 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -245,7 +245,6 @@ entry_SYSCALL64_slow_path:
+       call    do_syscall_64           /* returns with IRQs disabled */
+ 
+ return_from_SYSCALL_64:
+-      RESTORE_EXTRA_REGS
+       TRACE_IRQS_IRETQ                /* we're about to change IF */
+ 
+       /*
+@@ -314,6 +313,7 @@ return_from_SYSCALL_64:
+        */
+ syscall_return_via_sysret:
+       /* rcx and r11 are already restored (see code above) */
++      RESTORE_EXTRA_REGS
+       RESTORE_C_REGS_EXCEPT_RCX_R11
+       movq    RSP(%rsp), %rsp
+       UNWIND_HINT_EMPTY
+@@ -321,7 +321,7 @@ syscall_return_via_sysret:
+ 
+ opportunistic_sysret_failed:
+       SWAPGS
+-      jmp     restore_c_regs_and_iret
++      jmp     restore_regs_and_iret
+ END(entry_SYSCALL_64)
+ 
+ ENTRY(stub_ptregs_64)
+@@ -638,7 +638,6 @@ retint_kernel:
+  */
+ GLOBAL(restore_regs_and_iret)
+       RESTORE_EXTRA_REGS
+-restore_c_regs_and_iret:
+       RESTORE_C_REGS
+       REMOVE_PT_GPREGS_FROM_STACK 8
+       INTERRUPT_RETURN
+-- 
+2.14.2
+
diff --git a/patches/kernel/0089-x86-entry-64-Split-the-IRET-to-user-and-IRET-to-kern.patch b/patches/kernel/0089-x86-entry-64-Split-the-IRET-to-user-and-IRET-to-kern.patch

deleted file mode 100644 (file)

index 960c7be..0000000
--- a/patches/kernel/0089-x86-entry-64-Split-the-IRET-to-user-and-IRET-to-kern.patch
+++ /dev/null
@@ -1,134 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:58:59 -0700
-Subject: [PATCH] x86/entry/64: Split the IRET-to-user and IRET-to-kernel paths
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-These code paths will diverge soon.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/dccf8c7b3750199b4b30383c812d4e2931811509.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 26c4ef9c49d8a0341f6d97ce2cfdd55d1236ed29)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 64adfba0aeb668304d171c383ac80b22158ec128)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S        | 34 +++++++++++++++++++++++++---------
- arch/x86/entry/entry_64_compat.S |  2 +-
- arch/x86/kernel/head_64.S        |  2 +-
- 3 files changed, 27 insertions(+), 11 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index fac354ddf056..e546441fbec3 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -321,7 +321,7 @@ syscall_return_via_sysret:
- 
- opportunistic_sysret_failed:
-       SWAPGS
--      jmp     restore_regs_and_iret
-+      jmp     restore_regs_and_return_to_usermode
- END(entry_SYSCALL_64)
- 
- ENTRY(stub_ptregs_64)
-@@ -423,7 +423,7 @@ ENTRY(ret_from_fork)
-       call    syscall_return_slowpath /* returns with IRQs disabled */
-       TRACE_IRQS_ON                   /* user mode is traced as IRQS on */
-       SWAPGS
--      jmp     restore_regs_and_iret
-+      jmp     restore_regs_and_return_to_usermode
- 
- 1:
-       /* kernel thread */
-@@ -612,7 +612,20 @@ GLOBAL(retint_user)
-       call    prepare_exit_to_usermode
-       TRACE_IRQS_IRETQ
-       SWAPGS
--      jmp     restore_regs_and_iret
-+
-+GLOBAL(restore_regs_and_return_to_usermode)
-+#ifdef CONFIG_DEBUG_ENTRY
-+      /* Assert that pt_regs indicates user mode. */
-+      testl   $3, CS(%rsp)
-+      jnz     1f
-+      ud2
-+1:
-+#endif
-+      RESTORE_EXTRA_REGS
-+      RESTORE_C_REGS
-+      REMOVE_PT_GPREGS_FROM_STACK 8
-+      INTERRUPT_RETURN
-+
- 
- /* Returning to kernel space */
- retint_kernel:
-@@ -632,11 +645,14 @@ retint_kernel:
-        */
-       TRACE_IRQS_IRETQ
- 
--/*
-- * At this label, code paths which return to kernel and to user,
-- * which come from interrupts/exception and from syscalls, merge.
-- */
--GLOBAL(restore_regs_and_iret)
-+GLOBAL(restore_regs_and_return_to_kernel)
-+#ifdef CONFIG_DEBUG_ENTRY
-+      /* Assert that pt_regs indicates kernel mode. */
-+      testl   $3, CS(%rsp)
-+      jz      1f
-+      ud2
-+1:
-+#endif
-       RESTORE_EXTRA_REGS
-       RESTORE_C_REGS
-       REMOVE_PT_GPREGS_FROM_STACK 8
-@@ -1340,7 +1356,7 @@ ENTRY(nmi)
-        * work, because we don't want to enable interrupts.
-        */
-       SWAPGS
--      jmp     restore_regs_and_iret
-+      jmp     restore_regs_and_return_to_usermode
- 
- .Lnmi_from_kernel:
-       /*
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index d8468ba24be0..2b3a88feaa2b 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -337,7 +337,7 @@ ENTRY(entry_INT80_compat)
-       /* Go back to user mode. */
-       TRACE_IRQS_ON
-       SWAPGS
--      jmp     restore_regs_and_iret
-+      jmp     restore_regs_and_return_to_usermode
- END(entry_INT80_compat)
- 
-       ALIGN
-diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
-index 4117c1e0b3d2..e785734980ad 100644
---- a/arch/x86/kernel/head_64.S
-+++ b/arch/x86/kernel/head_64.S
-@@ -311,7 +311,7 @@ early_idt_handler_common:
- 
- 20:
-       decl early_recursion_flag(%rip)
--      jmp restore_regs_and_iret
-+      jmp restore_regs_and_return_to_kernel
- END(early_idt_handler_common)
- 
-       __INITDATA
--- 
-2.14.2
-
diff --git a/patches/kernel/0090-x86-entry-64-Move-SWAPGS-into-the-common-IRET-to-use.patch b/patches/kernel/0090-x86-entry-64-Move-SWAPGS-into-the-common-IRET-to-use.patch

deleted file mode 100644 (file)

index 81edf0f..0000000
--- a/patches/kernel/0090-x86-entry-64-Move-SWAPGS-into-the-common-IRET-to-use.patch
+++ /dev/null
@@ -1,156 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:00 -0700
-Subject: [PATCH] x86/entry/64: Move SWAPGS into the common IRET-to-usermode
- path
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-All of the code paths that ended up doing IRET to usermode did
-SWAPGS immediately beforehand.  Move the SWAPGS into the common
-code.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/27fd6f45b7cd640de38fb9066fd0349bcd11f8e1.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 8a055d7f411d41755ce30db5bb65b154777c4b78)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 62a85594f9be3baeb2495089f1c2980bc497d03b)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S        | 32 ++++++++++++++------------------
- arch/x86/entry/entry_64_compat.S |  3 +--
- 2 files changed, 15 insertions(+), 20 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index e546441fbec3..7c8258e3ad2d 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -249,12 +249,14 @@ return_from_SYSCALL_64:
- 
-       /*
-        * Try to use SYSRET instead of IRET if we're returning to
--       * a completely clean 64-bit userspace context.
-+       * a completely clean 64-bit userspace context.  If we're not,
-+       * go to the slow exit path.
-        */
-       movq    RCX(%rsp), %rcx
-       movq    RIP(%rsp), %r11
--      cmpq    %rcx, %r11                      /* RCX == RIP */
--      jne     opportunistic_sysret_failed
-+
-+      cmpq    %rcx, %r11      /* SYSRET requires RCX == RIP */
-+      jne     swapgs_restore_regs_and_return_to_usermode
- 
-       /*
-        * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
-@@ -272,14 +274,14 @@ return_from_SYSCALL_64:
- 
-       /* If this changed %rcx, it was not canonical */
-       cmpq    %rcx, %r11
--      jne     opportunistic_sysret_failed
-+      jne     swapgs_restore_regs_and_return_to_usermode
- 
-       cmpq    $__USER_CS, CS(%rsp)            /* CS must match SYSRET */
--      jne     opportunistic_sysret_failed
-+      jne     swapgs_restore_regs_and_return_to_usermode
- 
-       movq    R11(%rsp), %r11
-       cmpq    %r11, EFLAGS(%rsp)              /* R11 == RFLAGS */
--      jne     opportunistic_sysret_failed
-+      jne     swapgs_restore_regs_and_return_to_usermode
- 
-       /*
-        * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
-@@ -300,12 +302,12 @@ return_from_SYSCALL_64:
-        * would never get past 'stuck_here'.
-        */
-       testq   $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
--      jnz     opportunistic_sysret_failed
-+      jnz     swapgs_restore_regs_and_return_to_usermode
- 
-       /* nothing to check for RSP */
- 
-       cmpq    $__USER_DS, SS(%rsp)            /* SS must match SYSRET */
--      jne     opportunistic_sysret_failed
-+      jne     swapgs_restore_regs_and_return_to_usermode
- 
-       /*
-        * We win! This label is here just for ease of understanding
-@@ -318,10 +320,6 @@ syscall_return_via_sysret:
-       movq    RSP(%rsp), %rsp
-       UNWIND_HINT_EMPTY
-       USERGS_SYSRET64
--
--opportunistic_sysret_failed:
--      SWAPGS
--      jmp     restore_regs_and_return_to_usermode
- END(entry_SYSCALL_64)
- 
- ENTRY(stub_ptregs_64)
-@@ -422,8 +420,7 @@ ENTRY(ret_from_fork)
-       movq    %rsp, %rdi
-       call    syscall_return_slowpath /* returns with IRQs disabled */
-       TRACE_IRQS_ON                   /* user mode is traced as IRQS on */
--      SWAPGS
--      jmp     restore_regs_and_return_to_usermode
-+      jmp     swapgs_restore_regs_and_return_to_usermode
- 
- 1:
-       /* kernel thread */
-@@ -611,9 +608,8 @@ GLOBAL(retint_user)
-       mov     %rsp,%rdi
-       call    prepare_exit_to_usermode
-       TRACE_IRQS_IRETQ
--      SWAPGS
- 
--GLOBAL(restore_regs_and_return_to_usermode)
-+GLOBAL(swapgs_restore_regs_and_return_to_usermode)
- #ifdef CONFIG_DEBUG_ENTRY
-       /* Assert that pt_regs indicates user mode. */
-       testl   $3, CS(%rsp)
-@@ -621,6 +617,7 @@ GLOBAL(restore_regs_and_return_to_usermode)
-       ud2
- 1:
- #endif
-+      SWAPGS
-       RESTORE_EXTRA_REGS
-       RESTORE_C_REGS
-       REMOVE_PT_GPREGS_FROM_STACK 8
-@@ -1355,8 +1352,7 @@ ENTRY(nmi)
-        * Return back to user mode.  We must *not* do the normal exit
-        * work, because we don't want to enable interrupts.
-        */
--      SWAPGS
--      jmp     restore_regs_and_return_to_usermode
-+      jmp     swapgs_restore_regs_and_return_to_usermode
- 
- .Lnmi_from_kernel:
-       /*
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index 2b3a88feaa2b..be745b7a3e3e 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -336,8 +336,7 @@ ENTRY(entry_INT80_compat)
- 
-       /* Go back to user mode. */
-       TRACE_IRQS_ON
--      SWAPGS
--      jmp     restore_regs_and_return_to_usermode
-+      jmp     swapgs_restore_regs_and_return_to_usermode
- END(entry_INT80_compat)
- 
-       ALIGN
--- 
-2.14.2
-
diff --git a/patches/kernel/0090-x86-entry-64-Split-the-IRET-to-user-and-IRET-to-kern.patch b/patches/kernel/0090-x86-entry-64-Split-the-IRET-to-user-and-IRET-to-kern.patch

new file mode 100644 (file)

index 0000000..960c7be
--- /dev/null
+++ b/patches/kernel/0090-x86-entry-64-Split-the-IRET-to-user-and-IRET-to-kern.patch
@@ -0,0 +1,134 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:58:59 -0700
+Subject: [PATCH] x86/entry/64: Split the IRET-to-user and IRET-to-kernel paths
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+These code paths will diverge soon.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/dccf8c7b3750199b4b30383c812d4e2931811509.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 26c4ef9c49d8a0341f6d97ce2cfdd55d1236ed29)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 64adfba0aeb668304d171c383ac80b22158ec128)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S        | 34 +++++++++++++++++++++++++---------
+ arch/x86/entry/entry_64_compat.S |  2 +-
+ arch/x86/kernel/head_64.S        |  2 +-
+ 3 files changed, 27 insertions(+), 11 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index fac354ddf056..e546441fbec3 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -321,7 +321,7 @@ syscall_return_via_sysret:
+ 
+ opportunistic_sysret_failed:
+       SWAPGS
+-      jmp     restore_regs_and_iret
++      jmp     restore_regs_and_return_to_usermode
+ END(entry_SYSCALL_64)
+ 
+ ENTRY(stub_ptregs_64)
+@@ -423,7 +423,7 @@ ENTRY(ret_from_fork)
+       call    syscall_return_slowpath /* returns with IRQs disabled */
+       TRACE_IRQS_ON                   /* user mode is traced as IRQS on */
+       SWAPGS
+-      jmp     restore_regs_and_iret
++      jmp     restore_regs_and_return_to_usermode
+ 
+ 1:
+       /* kernel thread */
+@@ -612,7 +612,20 @@ GLOBAL(retint_user)
+       call    prepare_exit_to_usermode
+       TRACE_IRQS_IRETQ
+       SWAPGS
+-      jmp     restore_regs_and_iret
++
++GLOBAL(restore_regs_and_return_to_usermode)
++#ifdef CONFIG_DEBUG_ENTRY
++      /* Assert that pt_regs indicates user mode. */
++      testl   $3, CS(%rsp)
++      jnz     1f
++      ud2
++1:
++#endif
++      RESTORE_EXTRA_REGS
++      RESTORE_C_REGS
++      REMOVE_PT_GPREGS_FROM_STACK 8
++      INTERRUPT_RETURN
++
+ 
+ /* Returning to kernel space */
+ retint_kernel:
+@@ -632,11 +645,14 @@ retint_kernel:
+        */
+       TRACE_IRQS_IRETQ
+ 
+-/*
+- * At this label, code paths which return to kernel and to user,
+- * which come from interrupts/exception and from syscalls, merge.
+- */
+-GLOBAL(restore_regs_and_iret)
++GLOBAL(restore_regs_and_return_to_kernel)
++#ifdef CONFIG_DEBUG_ENTRY
++      /* Assert that pt_regs indicates kernel mode. */
++      testl   $3, CS(%rsp)
++      jz      1f
++      ud2
++1:
++#endif
+       RESTORE_EXTRA_REGS
+       RESTORE_C_REGS
+       REMOVE_PT_GPREGS_FROM_STACK 8
+@@ -1340,7 +1356,7 @@ ENTRY(nmi)
+        * work, because we don't want to enable interrupts.
+        */
+       SWAPGS
+-      jmp     restore_regs_and_iret
++      jmp     restore_regs_and_return_to_usermode
+ 
+ .Lnmi_from_kernel:
+       /*
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index d8468ba24be0..2b3a88feaa2b 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -337,7 +337,7 @@ ENTRY(entry_INT80_compat)
+       /* Go back to user mode. */
+       TRACE_IRQS_ON
+       SWAPGS
+-      jmp     restore_regs_and_iret
++      jmp     restore_regs_and_return_to_usermode
+ END(entry_INT80_compat)
+ 
+       ALIGN
+diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
+index 4117c1e0b3d2..e785734980ad 100644
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -311,7 +311,7 @@ early_idt_handler_common:
+ 
+ 20:
+       decl early_recursion_flag(%rip)
+-      jmp restore_regs_and_iret
++      jmp restore_regs_and_return_to_kernel
+ END(early_idt_handler_common)
+ 
+       __INITDATA
+-- 
+2.14.2
+
diff --git a/patches/kernel/0091-x86-entry-64-Move-SWAPGS-into-the-common-IRET-to-use.patch b/patches/kernel/0091-x86-entry-64-Move-SWAPGS-into-the-common-IRET-to-use.patch

new file mode 100644 (file)

index 0000000..81edf0f
--- /dev/null
+++ b/patches/kernel/0091-x86-entry-64-Move-SWAPGS-into-the-common-IRET-to-use.patch
@@ -0,0 +1,156 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:00 -0700
+Subject: [PATCH] x86/entry/64: Move SWAPGS into the common IRET-to-usermode
+ path
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+All of the code paths that ended up doing IRET to usermode did
+SWAPGS immediately beforehand.  Move the SWAPGS into the common
+code.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/27fd6f45b7cd640de38fb9066fd0349bcd11f8e1.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 8a055d7f411d41755ce30db5bb65b154777c4b78)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 62a85594f9be3baeb2495089f1c2980bc497d03b)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S        | 32 ++++++++++++++------------------
+ arch/x86/entry/entry_64_compat.S |  3 +--
+ 2 files changed, 15 insertions(+), 20 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index e546441fbec3..7c8258e3ad2d 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -249,12 +249,14 @@ return_from_SYSCALL_64:
+ 
+       /*
+        * Try to use SYSRET instead of IRET if we're returning to
+-       * a completely clean 64-bit userspace context.
++       * a completely clean 64-bit userspace context.  If we're not,
++       * go to the slow exit path.
+        */
+       movq    RCX(%rsp), %rcx
+       movq    RIP(%rsp), %r11
+-      cmpq    %rcx, %r11                      /* RCX == RIP */
+-      jne     opportunistic_sysret_failed
++
++      cmpq    %rcx, %r11      /* SYSRET requires RCX == RIP */
++      jne     swapgs_restore_regs_and_return_to_usermode
+ 
+       /*
+        * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+@@ -272,14 +274,14 @@ return_from_SYSCALL_64:
+ 
+       /* If this changed %rcx, it was not canonical */
+       cmpq    %rcx, %r11
+-      jne     opportunistic_sysret_failed
++      jne     swapgs_restore_regs_and_return_to_usermode
+ 
+       cmpq    $__USER_CS, CS(%rsp)            /* CS must match SYSRET */
+-      jne     opportunistic_sysret_failed
++      jne     swapgs_restore_regs_and_return_to_usermode
+ 
+       movq    R11(%rsp), %r11
+       cmpq    %r11, EFLAGS(%rsp)              /* R11 == RFLAGS */
+-      jne     opportunistic_sysret_failed
++      jne     swapgs_restore_regs_and_return_to_usermode
+ 
+       /*
+        * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
+@@ -300,12 +302,12 @@ return_from_SYSCALL_64:
+        * would never get past 'stuck_here'.
+        */
+       testq   $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
+-      jnz     opportunistic_sysret_failed
++      jnz     swapgs_restore_regs_and_return_to_usermode
+ 
+       /* nothing to check for RSP */
+ 
+       cmpq    $__USER_DS, SS(%rsp)            /* SS must match SYSRET */
+-      jne     opportunistic_sysret_failed
++      jne     swapgs_restore_regs_and_return_to_usermode
+ 
+       /*
+        * We win! This label is here just for ease of understanding
+@@ -318,10 +320,6 @@ syscall_return_via_sysret:
+       movq    RSP(%rsp), %rsp
+       UNWIND_HINT_EMPTY
+       USERGS_SYSRET64
+-
+-opportunistic_sysret_failed:
+-      SWAPGS
+-      jmp     restore_regs_and_return_to_usermode
+ END(entry_SYSCALL_64)
+ 
+ ENTRY(stub_ptregs_64)
+@@ -422,8 +420,7 @@ ENTRY(ret_from_fork)
+       movq    %rsp, %rdi
+       call    syscall_return_slowpath /* returns with IRQs disabled */
+       TRACE_IRQS_ON                   /* user mode is traced as IRQS on */
+-      SWAPGS
+-      jmp     restore_regs_and_return_to_usermode
++      jmp     swapgs_restore_regs_and_return_to_usermode
+ 
+ 1:
+       /* kernel thread */
+@@ -611,9 +608,8 @@ GLOBAL(retint_user)
+       mov     %rsp,%rdi
+       call    prepare_exit_to_usermode
+       TRACE_IRQS_IRETQ
+-      SWAPGS
+ 
+-GLOBAL(restore_regs_and_return_to_usermode)
++GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+ #ifdef CONFIG_DEBUG_ENTRY
+       /* Assert that pt_regs indicates user mode. */
+       testl   $3, CS(%rsp)
+@@ -621,6 +617,7 @@ GLOBAL(restore_regs_and_return_to_usermode)
+       ud2
+ 1:
+ #endif
++      SWAPGS
+       RESTORE_EXTRA_REGS
+       RESTORE_C_REGS
+       REMOVE_PT_GPREGS_FROM_STACK 8
+@@ -1355,8 +1352,7 @@ ENTRY(nmi)
+        * Return back to user mode.  We must *not* do the normal exit
+        * work, because we don't want to enable interrupts.
+        */
+-      SWAPGS
+-      jmp     restore_regs_and_return_to_usermode
++      jmp     swapgs_restore_regs_and_return_to_usermode
+ 
+ .Lnmi_from_kernel:
+       /*
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index 2b3a88feaa2b..be745b7a3e3e 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -336,8 +336,7 @@ ENTRY(entry_INT80_compat)
+ 
+       /* Go back to user mode. */
+       TRACE_IRQS_ON
+-      SWAPGS
+-      jmp     restore_regs_and_return_to_usermode
++      jmp     swapgs_restore_regs_and_return_to_usermode
+ END(entry_INT80_compat)
+ 
+       ALIGN
+-- 
+2.14.2
+
diff --git a/patches/kernel/0091-x86-entry-64-Simplify-reg-restore-code-in-the-standa.patch b/patches/kernel/0091-x86-entry-64-Simplify-reg-restore-code-in-the-standa.patch

deleted file mode 100644 (file)

index c413507..0000000
--- a/patches/kernel/0091-x86-entry-64-Simplify-reg-restore-code-in-the-standa.patch
+++ /dev/null
@@ -1,103 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:01 -0700
-Subject: [PATCH] x86/entry/64: Simplify reg restore code in the standard IRET
- paths
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The old code restored all the registers with movq instead of pop.
-
-In theory, this was done because some CPUs have higher movq
-throughput, but any gain there would be tiny and is almost certainly
-outweighed by the higher text size.
-
-This saves 96 bytes of text.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/ad82520a207ccd851b04ba613f4f752b33ac05f7.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit e872045bfd9c465a8555bab4b8567d56a4d2d3bb)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f926575cd370de4052e89477582b349af5664a56)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/calling.h  | 21 +++++++++++++++++++++
- arch/x86/entry/entry_64.S | 12 ++++++------
- 2 files changed, 27 insertions(+), 6 deletions(-)
-
-diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
-index 640aafebdc00..0b9dd8123701 100644
---- a/arch/x86/entry/calling.h
-+++ b/arch/x86/entry/calling.h
-@@ -151,6 +151,27 @@ For 32-bit we have the following conventions - kernel is built with
-       UNWIND_HINT_REGS offset=\offset extra=0
-       .endm
- 
-+      .macro POP_EXTRA_REGS
-+      popq %r15
-+      popq %r14
-+      popq %r13
-+      popq %r12
-+      popq %rbp
-+      popq %rbx
-+      .endm
-+
-+      .macro POP_C_REGS
-+      popq %r11
-+      popq %r10
-+      popq %r9
-+      popq %r8
-+      popq %rax
-+      popq %rcx
-+      popq %rdx
-+      popq %rsi
-+      popq %rdi
-+      .endm
-+
-       .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
-       .if \rstor_r11
-       movq 6*8(%rsp), %r11
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 7c8258e3ad2d..a1a86e782a0e 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -618,9 +618,9 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
- 1:
- #endif
-       SWAPGS
--      RESTORE_EXTRA_REGS
--      RESTORE_C_REGS
--      REMOVE_PT_GPREGS_FROM_STACK 8
-+      POP_EXTRA_REGS
-+      POP_C_REGS
-+      addq    $8, %rsp        /* skip regs->orig_ax */
-       INTERRUPT_RETURN
- 
- 
-@@ -650,9 +650,9 @@ GLOBAL(restore_regs_and_return_to_kernel)
-       ud2
- 1:
- #endif
--      RESTORE_EXTRA_REGS
--      RESTORE_C_REGS
--      REMOVE_PT_GPREGS_FROM_STACK 8
-+      POP_EXTRA_REGS
-+      POP_C_REGS
-+      addq    $8, %rsp        /* skip regs->orig_ax */
-       INTERRUPT_RETURN
- 
- ENTRY(native_iret)
--- 
-2.14.2
-
diff --git a/patches/kernel/0092-x86-entry-64-Shrink-paranoid_exit_restore-and-make-l.patch b/patches/kernel/0092-x86-entry-64-Shrink-paranoid_exit_restore-and-make-l.patch

deleted file mode 100644 (file)

index dce86c4..0000000
--- a/patches/kernel/0092-x86-entry-64-Shrink-paranoid_exit_restore-and-make-l.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:02 -0700
-Subject: [PATCH] x86/entry/64: Shrink paranoid_exit_restore and make labels
- local
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-paranoid_exit_restore was a copy of restore_regs_and_return_to_kernel.
-Merge them and make the paranoid_exit internal labels local.
-
-Keeping .Lparanoid_exit makes the code a bit shorter because it
-allows a 2-byte jnz instead of a 5-byte jnz.
-
-Saves 96 bytes of text.
-
-( This is still a bit suboptimal in a non-CONFIG_TRACE_IRQFLAGS
-  kernel, but fixing that would make the code rather messy. )
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/510d66a1895cda9473c84b1086f0bb974f22de6a.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit e53178328c9b96fbdbc719e78c93b5687ee007c3)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit fb53fe10add935c3d0eb63199e43426eaf3b4299)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S | 13 +++++--------
- 1 file changed, 5 insertions(+), 8 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index a1a86e782a0e..6995f7e08aa1 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -1136,17 +1136,14 @@ ENTRY(paranoid_exit)
-       DISABLE_INTERRUPTS(CLBR_ANY)
-       TRACE_IRQS_OFF_DEBUG
-       testl   %ebx, %ebx                      /* swapgs needed? */
--      jnz     paranoid_exit_no_swapgs
-+      jnz     .Lparanoid_exit_no_swapgs
-       TRACE_IRQS_IRETQ
-       SWAPGS_UNSAFE_STACK
--      jmp     paranoid_exit_restore
--paranoid_exit_no_swapgs:
-+      jmp     .Lparanoid_exit_restore
-+.Lparanoid_exit_no_swapgs:
-       TRACE_IRQS_IRETQ_DEBUG
--paranoid_exit_restore:
--      RESTORE_EXTRA_REGS
--      RESTORE_C_REGS
--      REMOVE_PT_GPREGS_FROM_STACK 8
--      INTERRUPT_RETURN
-+.Lparanoid_exit_restore:
-+      jmp restore_regs_and_return_to_kernel
- END(paranoid_exit)
- 
- /*
--- 
-2.14.2
-
diff --git a/patches/kernel/0092-x86-entry-64-Simplify-reg-restore-code-in-the-standa.patch b/patches/kernel/0092-x86-entry-64-Simplify-reg-restore-code-in-the-standa.patch

new file mode 100644 (file)

index 0000000..c413507
--- /dev/null
+++ b/patches/kernel/0092-x86-entry-64-Simplify-reg-restore-code-in-the-standa.patch
@@ -0,0 +1,103 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:01 -0700
+Subject: [PATCH] x86/entry/64: Simplify reg restore code in the standard IRET
+ paths
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The old code restored all the registers with movq instead of pop.
+
+In theory, this was done because some CPUs have higher movq
+throughput, but any gain there would be tiny and is almost certainly
+outweighed by the higher text size.
+
+This saves 96 bytes of text.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/ad82520a207ccd851b04ba613f4f752b33ac05f7.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit e872045bfd9c465a8555bab4b8567d56a4d2d3bb)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f926575cd370de4052e89477582b349af5664a56)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/calling.h  | 21 +++++++++++++++++++++
+ arch/x86/entry/entry_64.S | 12 ++++++------
+ 2 files changed, 27 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
+index 640aafebdc00..0b9dd8123701 100644
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -151,6 +151,27 @@ For 32-bit we have the following conventions - kernel is built with
+       UNWIND_HINT_REGS offset=\offset extra=0
+       .endm
+ 
++      .macro POP_EXTRA_REGS
++      popq %r15
++      popq %r14
++      popq %r13
++      popq %r12
++      popq %rbp
++      popq %rbx
++      .endm
++
++      .macro POP_C_REGS
++      popq %r11
++      popq %r10
++      popq %r9
++      popq %r8
++      popq %rax
++      popq %rcx
++      popq %rdx
++      popq %rsi
++      popq %rdi
++      .endm
++
+       .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
+       .if \rstor_r11
+       movq 6*8(%rsp), %r11
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 7c8258e3ad2d..a1a86e782a0e 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -618,9 +618,9 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+ 1:
+ #endif
+       SWAPGS
+-      RESTORE_EXTRA_REGS
+-      RESTORE_C_REGS
+-      REMOVE_PT_GPREGS_FROM_STACK 8
++      POP_EXTRA_REGS
++      POP_C_REGS
++      addq    $8, %rsp        /* skip regs->orig_ax */
+       INTERRUPT_RETURN
+ 
+ 
+@@ -650,9 +650,9 @@ GLOBAL(restore_regs_and_return_to_kernel)
+       ud2
+ 1:
+ #endif
+-      RESTORE_EXTRA_REGS
+-      RESTORE_C_REGS
+-      REMOVE_PT_GPREGS_FROM_STACK 8
++      POP_EXTRA_REGS
++      POP_C_REGS
++      addq    $8, %rsp        /* skip regs->orig_ax */
+       INTERRUPT_RETURN
+ 
+ ENTRY(native_iret)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0093-x86-entry-64-Shrink-paranoid_exit_restore-and-make-l.patch b/patches/kernel/0093-x86-entry-64-Shrink-paranoid_exit_restore-and-make-l.patch

new file mode 100644 (file)

index 0000000..dce86c4
--- /dev/null
+++ b/patches/kernel/0093-x86-entry-64-Shrink-paranoid_exit_restore-and-make-l.patch
@@ -0,0 +1,70 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:02 -0700
+Subject: [PATCH] x86/entry/64: Shrink paranoid_exit_restore and make labels
+ local
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+paranoid_exit_restore was a copy of restore_regs_and_return_to_kernel.
+Merge them and make the paranoid_exit internal labels local.
+
+Keeping .Lparanoid_exit makes the code a bit shorter because it
+allows a 2-byte jnz instead of a 5-byte jnz.
+
+Saves 96 bytes of text.
+
+( This is still a bit suboptimal in a non-CONFIG_TRACE_IRQFLAGS
+  kernel, but fixing that would make the code rather messy. )
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/510d66a1895cda9473c84b1086f0bb974f22de6a.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit e53178328c9b96fbdbc719e78c93b5687ee007c3)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit fb53fe10add935c3d0eb63199e43426eaf3b4299)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S | 13 +++++--------
+ 1 file changed, 5 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index a1a86e782a0e..6995f7e08aa1 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1136,17 +1136,14 @@ ENTRY(paranoid_exit)
+       DISABLE_INTERRUPTS(CLBR_ANY)
+       TRACE_IRQS_OFF_DEBUG
+       testl   %ebx, %ebx                      /* swapgs needed? */
+-      jnz     paranoid_exit_no_swapgs
++      jnz     .Lparanoid_exit_no_swapgs
+       TRACE_IRQS_IRETQ
+       SWAPGS_UNSAFE_STACK
+-      jmp     paranoid_exit_restore
+-paranoid_exit_no_swapgs:
++      jmp     .Lparanoid_exit_restore
++.Lparanoid_exit_no_swapgs:
+       TRACE_IRQS_IRETQ_DEBUG
+-paranoid_exit_restore:
+-      RESTORE_EXTRA_REGS
+-      RESTORE_C_REGS
+-      REMOVE_PT_GPREGS_FROM_STACK 8
+-      INTERRUPT_RETURN
++.Lparanoid_exit_restore:
++      jmp restore_regs_and_return_to_kernel
+ END(paranoid_exit)
+ 
+ /*
+-- 
+2.14.2
+
diff --git a/patches/kernel/0093-x86-entry-64-Use-pop-instead-of-movq-in-syscall_retu.patch b/patches/kernel/0093-x86-entry-64-Use-pop-instead-of-movq-in-syscall_retu.patch

deleted file mode 100644 (file)

index 557e590..0000000
--- a/patches/kernel/0093-x86-entry-64-Use-pop-instead-of-movq-in-syscall_retu.patch
+++ /dev/null
@@ -1,61 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:03 -0700
-Subject: [PATCH] x86/entry/64: Use pop instead of movq in
- syscall_return_via_sysret
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Saves 64 bytes.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/6609b7f74ab31c36604ad746e019ea8495aec76c.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 4fbb39108f972437c44e5ffa781b56635d496826)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 1e9a9d5ef9f65eeb26eb8f0974dd3e693894baf1)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S | 14 +++++++++++---
- 1 file changed, 11 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 6995f7e08aa1..33a416c7df2d 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -315,10 +315,18 @@ return_from_SYSCALL_64:
-        */
- syscall_return_via_sysret:
-       /* rcx and r11 are already restored (see code above) */
--      RESTORE_EXTRA_REGS
--      RESTORE_C_REGS_EXCEPT_RCX_R11
--      movq    RSP(%rsp), %rsp
-       UNWIND_HINT_EMPTY
-+      POP_EXTRA_REGS
-+      popq    %rsi    /* skip r11 */
-+      popq    %r10
-+      popq    %r9
-+      popq    %r8
-+      popq    %rax
-+      popq    %rsi    /* skip rcx */
-+      popq    %rdx
-+      popq    %rsi
-+      popq    %rdi
-+      movq    RSP-ORIG_RAX(%rsp), %rsp
-       USERGS_SYSRET64
- END(entry_SYSCALL_64)
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0094-x86-entry-64-Merge-the-fast-and-slow-SYSRET-paths.patch b/patches/kernel/0094-x86-entry-64-Merge-the-fast-and-slow-SYSRET-paths.patch

deleted file mode 100644 (file)

index 4fa0876..0000000
--- a/patches/kernel/0094-x86-entry-64-Merge-the-fast-and-slow-SYSRET-paths.patch
+++ /dev/null
@@ -1,60 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:04 -0700
-Subject: [PATCH] x86/entry/64: Merge the fast and slow SYSRET paths
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-They did almost the same thing.  Remove a bunch of pointless
-instructions (mostly hidden in macros) and reduce cognitive load by
-merging them.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/1204e20233fcab9130a1ba80b3b1879b5db3fc1f.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit a512210643da8082cb44181dba8b18e752bd68f0)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 7c4575d8bb2d01960ba9b9840fa22460e0179eca)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 33a416c7df2d..87be1cd1fa88 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -220,10 +220,9 @@ entry_SYSCALL_64_fastpath:
-       TRACE_IRQS_ON           /* user mode is traced as IRQs on */
-       movq    RIP(%rsp), %rcx
-       movq    EFLAGS(%rsp), %r11
--      RESTORE_C_REGS_EXCEPT_RCX_R11
--      movq    RSP(%rsp), %rsp
-+      addq    $6*8, %rsp      /* skip extra regs -- they were preserved */
-       UNWIND_HINT_EMPTY
--      USERGS_SYSRET64
-+      jmp     .Lpop_c_regs_except_rcx_r11_and_sysret
- 
- 1:
-       /*
-@@ -317,6 +316,7 @@ syscall_return_via_sysret:
-       /* rcx and r11 are already restored (see code above) */
-       UNWIND_HINT_EMPTY
-       POP_EXTRA_REGS
-+.Lpop_c_regs_except_rcx_r11_and_sysret:
-       popq    %rsi    /* skip r11 */
-       popq    %r10
-       popq    %r9
--- 
-2.14.2
-
diff --git a/patches/kernel/0094-x86-entry-64-Use-pop-instead-of-movq-in-syscall_retu.patch b/patches/kernel/0094-x86-entry-64-Use-pop-instead-of-movq-in-syscall_retu.patch

new file mode 100644 (file)

index 0000000..557e590
--- /dev/null
+++ b/patches/kernel/0094-x86-entry-64-Use-pop-instead-of-movq-in-syscall_retu.patch
@@ -0,0 +1,61 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:03 -0700
+Subject: [PATCH] x86/entry/64: Use pop instead of movq in
+ syscall_return_via_sysret
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Saves 64 bytes.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/6609b7f74ab31c36604ad746e019ea8495aec76c.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 4fbb39108f972437c44e5ffa781b56635d496826)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 1e9a9d5ef9f65eeb26eb8f0974dd3e693894baf1)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 6995f7e08aa1..33a416c7df2d 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -315,10 +315,18 @@ return_from_SYSCALL_64:
+        */
+ syscall_return_via_sysret:
+       /* rcx and r11 are already restored (see code above) */
+-      RESTORE_EXTRA_REGS
+-      RESTORE_C_REGS_EXCEPT_RCX_R11
+-      movq    RSP(%rsp), %rsp
+       UNWIND_HINT_EMPTY
++      POP_EXTRA_REGS
++      popq    %rsi    /* skip r11 */
++      popq    %r10
++      popq    %r9
++      popq    %r8
++      popq    %rax
++      popq    %rsi    /* skip rcx */
++      popq    %rdx
++      popq    %rsi
++      popq    %rdi
++      movq    RSP-ORIG_RAX(%rsp), %rsp
+       USERGS_SYSRET64
+ END(entry_SYSCALL_64)
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0095-x86-entry-64-Merge-the-fast-and-slow-SYSRET-paths.patch b/patches/kernel/0095-x86-entry-64-Merge-the-fast-and-slow-SYSRET-paths.patch

new file mode 100644 (file)

index 0000000..4fa0876
--- /dev/null
+++ b/patches/kernel/0095-x86-entry-64-Merge-the-fast-and-slow-SYSRET-paths.patch
@@ -0,0 +1,60 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:04 -0700
+Subject: [PATCH] x86/entry/64: Merge the fast and slow SYSRET paths
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+They did almost the same thing.  Remove a bunch of pointless
+instructions (mostly hidden in macros) and reduce cognitive load by
+merging them.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1204e20233fcab9130a1ba80b3b1879b5db3fc1f.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit a512210643da8082cb44181dba8b18e752bd68f0)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 7c4575d8bb2d01960ba9b9840fa22460e0179eca)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 33a416c7df2d..87be1cd1fa88 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -220,10 +220,9 @@ entry_SYSCALL_64_fastpath:
+       TRACE_IRQS_ON           /* user mode is traced as IRQs on */
+       movq    RIP(%rsp), %rcx
+       movq    EFLAGS(%rsp), %r11
+-      RESTORE_C_REGS_EXCEPT_RCX_R11
+-      movq    RSP(%rsp), %rsp
++      addq    $6*8, %rsp      /* skip extra regs -- they were preserved */
+       UNWIND_HINT_EMPTY
+-      USERGS_SYSRET64
++      jmp     .Lpop_c_regs_except_rcx_r11_and_sysret
+ 
+ 1:
+       /*
+@@ -317,6 +316,7 @@ syscall_return_via_sysret:
+       /* rcx and r11 are already restored (see code above) */
+       UNWIND_HINT_EMPTY
+       POP_EXTRA_REGS
++.Lpop_c_regs_except_rcx_r11_and_sysret:
+       popq    %rsi    /* skip r11 */
+       popq    %r10
+       popq    %r9
+-- 
+2.14.2
+
diff --git a/patches/kernel/0095-x86-entry-64-Use-POP-instead-of-MOV-to-restore-regs-.patch b/patches/kernel/0095-x86-entry-64-Use-POP-instead-of-MOV-to-restore-regs-.patch

deleted file mode 100644 (file)

index d93f334..0000000
--- a/patches/kernel/0095-x86-entry-64-Use-POP-instead-of-MOV-to-restore-regs-.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:05 -0700
-Subject: [PATCH] x86/entry/64: Use POP instead of MOV to restore regs on NMI
- return
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This gets rid of the last user of the old RESTORE_..._REGS infrastructure.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/652a260f17a160789bc6a41d997f98249b73e2ab.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 471ee4832209e986029b9fabdaad57b1eecb856b)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 3c5771a43d8f00e53081871027fea891a091ff5e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S | 11 +++++++----
- 1 file changed, 7 insertions(+), 4 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 87be1cd1fa88..4eff3aca54ed 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -1572,11 +1572,14 @@ end_repeat_nmi:
- nmi_swapgs:
-       SWAPGS_UNSAFE_STACK
- nmi_restore:
--      RESTORE_EXTRA_REGS
--      RESTORE_C_REGS
-+      POP_EXTRA_REGS
-+      POP_C_REGS
- 
--      /* Point RSP at the "iret" frame. */
--      REMOVE_PT_GPREGS_FROM_STACK 6*8
-+      /*
-+       * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
-+       * at the "iret" frame.
-+       */
-+      addq    $6*8, %rsp
- 
-       /*
-        * Clear "NMI executing".  Set DF first so that we can easily
--- 
-2.14.2
-
diff --git a/patches/kernel/0096-x86-entry-64-Remove-the-RESTORE_._REGS-infrastructur.patch b/patches/kernel/0096-x86-entry-64-Remove-the-RESTORE_._REGS-infrastructur.patch

deleted file mode 100644 (file)

index 0c1434a..0000000
--- a/patches/kernel/0096-x86-entry-64-Remove-the-RESTORE_._REGS-infrastructur.patch
+++ /dev/null
@@ -1,104 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:06 -0700
-Subject: [PATCH] x86/entry/64: Remove the RESTORE_..._REGS infrastructure
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-All users of RESTORE_EXTRA_REGS, RESTORE_C_REGS and such, and
-REMOVE_PT_GPREGS_FROM_STACK are gone.  Delete the macros.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/c32672f6e47c561893316d48e06c7656b1039a36.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit c39858de696f0cc160a544455e8403d663d577e9)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d248c62028c5467cd5a5ce06d344e3fb330da3ec)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/calling.h | 52 ------------------------------------------------
- 1 file changed, 52 deletions(-)
-
-diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
-index 0b9dd8123701..1895a685d3dd 100644
---- a/arch/x86/entry/calling.h
-+++ b/arch/x86/entry/calling.h
-@@ -141,16 +141,6 @@ For 32-bit we have the following conventions - kernel is built with
-       UNWIND_HINT_REGS offset=\offset
-       .endm
- 
--      .macro RESTORE_EXTRA_REGS offset=0
--      movq 0*8+\offset(%rsp), %r15
--      movq 1*8+\offset(%rsp), %r14
--      movq 2*8+\offset(%rsp), %r13
--      movq 3*8+\offset(%rsp), %r12
--      movq 4*8+\offset(%rsp), %rbp
--      movq 5*8+\offset(%rsp), %rbx
--      UNWIND_HINT_REGS offset=\offset extra=0
--      .endm
--
-       .macro POP_EXTRA_REGS
-       popq %r15
-       popq %r14
-@@ -172,48 +162,6 @@ For 32-bit we have the following conventions - kernel is built with
-       popq %rdi
-       .endm
- 
--      .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
--      .if \rstor_r11
--      movq 6*8(%rsp), %r11
--      .endif
--      .if \rstor_r8910
--      movq 7*8(%rsp), %r10
--      movq 8*8(%rsp), %r9
--      movq 9*8(%rsp), %r8
--      .endif
--      .if \rstor_rax
--      movq 10*8(%rsp), %rax
--      .endif
--      .if \rstor_rcx
--      movq 11*8(%rsp), %rcx
--      .endif
--      .if \rstor_rdx
--      movq 12*8(%rsp), %rdx
--      .endif
--      movq 13*8(%rsp), %rsi
--      movq 14*8(%rsp), %rdi
--      UNWIND_HINT_IRET_REGS offset=16*8
--      .endm
--      .macro RESTORE_C_REGS
--      RESTORE_C_REGS_HELPER 1,1,1,1,1
--      .endm
--      .macro RESTORE_C_REGS_EXCEPT_RAX
--      RESTORE_C_REGS_HELPER 0,1,1,1,1
--      .endm
--      .macro RESTORE_C_REGS_EXCEPT_RCX
--      RESTORE_C_REGS_HELPER 1,0,1,1,1
--      .endm
--      .macro RESTORE_C_REGS_EXCEPT_R11
--      RESTORE_C_REGS_HELPER 1,1,0,1,1
--      .endm
--      .macro RESTORE_C_REGS_EXCEPT_RCX_R11
--      RESTORE_C_REGS_HELPER 1,0,0,1,1
--      .endm
--
--      .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
--      subq $-(15*8+\addskip), %rsp
--      .endm
--
-       .macro icebp
-       .byte 0xf1
-       .endm
--- 
-2.14.2
-
diff --git a/patches/kernel/0096-x86-entry-64-Use-POP-instead-of-MOV-to-restore-regs-.patch b/patches/kernel/0096-x86-entry-64-Use-POP-instead-of-MOV-to-restore-regs-.patch

new file mode 100644 (file)

index 0000000..d93f334
--- /dev/null
+++ b/patches/kernel/0096-x86-entry-64-Use-POP-instead-of-MOV-to-restore-regs-.patch
@@ -0,0 +1,57 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:05 -0700
+Subject: [PATCH] x86/entry/64: Use POP instead of MOV to restore regs on NMI
+ return
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This gets rid of the last user of the old RESTORE_..._REGS infrastructure.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/652a260f17a160789bc6a41d997f98249b73e2ab.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 471ee4832209e986029b9fabdaad57b1eecb856b)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 3c5771a43d8f00e53081871027fea891a091ff5e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 87be1cd1fa88..4eff3aca54ed 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1572,11 +1572,14 @@ end_repeat_nmi:
+ nmi_swapgs:
+       SWAPGS_UNSAFE_STACK
+ nmi_restore:
+-      RESTORE_EXTRA_REGS
+-      RESTORE_C_REGS
++      POP_EXTRA_REGS
++      POP_C_REGS
+ 
+-      /* Point RSP at the "iret" frame. */
+-      REMOVE_PT_GPREGS_FROM_STACK 6*8
++      /*
++       * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
++       * at the "iret" frame.
++       */
++      addq    $6*8, %rsp
+ 
+       /*
+        * Clear "NMI executing".  Set DF first so that we can easily
+-- 
+2.14.2
+
diff --git a/patches/kernel/0097-x86-entry-64-Remove-the-RESTORE_._REGS-infrastructur.patch b/patches/kernel/0097-x86-entry-64-Remove-the-RESTORE_._REGS-infrastructur.patch

new file mode 100644 (file)

index 0000000..0c1434a
--- /dev/null
+++ b/patches/kernel/0097-x86-entry-64-Remove-the-RESTORE_._REGS-infrastructur.patch
@@ -0,0 +1,104 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:06 -0700
+Subject: [PATCH] x86/entry/64: Remove the RESTORE_..._REGS infrastructure
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+All users of RESTORE_EXTRA_REGS, RESTORE_C_REGS and such, and
+REMOVE_PT_GPREGS_FROM_STACK are gone.  Delete the macros.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/c32672f6e47c561893316d48e06c7656b1039a36.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit c39858de696f0cc160a544455e8403d663d577e9)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d248c62028c5467cd5a5ce06d344e3fb330da3ec)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/calling.h | 52 ------------------------------------------------
+ 1 file changed, 52 deletions(-)
+
+diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
+index 0b9dd8123701..1895a685d3dd 100644
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -141,16 +141,6 @@ For 32-bit we have the following conventions - kernel is built with
+       UNWIND_HINT_REGS offset=\offset
+       .endm
+ 
+-      .macro RESTORE_EXTRA_REGS offset=0
+-      movq 0*8+\offset(%rsp), %r15
+-      movq 1*8+\offset(%rsp), %r14
+-      movq 2*8+\offset(%rsp), %r13
+-      movq 3*8+\offset(%rsp), %r12
+-      movq 4*8+\offset(%rsp), %rbp
+-      movq 5*8+\offset(%rsp), %rbx
+-      UNWIND_HINT_REGS offset=\offset extra=0
+-      .endm
+-
+       .macro POP_EXTRA_REGS
+       popq %r15
+       popq %r14
+@@ -172,48 +162,6 @@ For 32-bit we have the following conventions - kernel is built with
+       popq %rdi
+       .endm
+ 
+-      .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
+-      .if \rstor_r11
+-      movq 6*8(%rsp), %r11
+-      .endif
+-      .if \rstor_r8910
+-      movq 7*8(%rsp), %r10
+-      movq 8*8(%rsp), %r9
+-      movq 9*8(%rsp), %r8
+-      .endif
+-      .if \rstor_rax
+-      movq 10*8(%rsp), %rax
+-      .endif
+-      .if \rstor_rcx
+-      movq 11*8(%rsp), %rcx
+-      .endif
+-      .if \rstor_rdx
+-      movq 12*8(%rsp), %rdx
+-      .endif
+-      movq 13*8(%rsp), %rsi
+-      movq 14*8(%rsp), %rdi
+-      UNWIND_HINT_IRET_REGS offset=16*8
+-      .endm
+-      .macro RESTORE_C_REGS
+-      RESTORE_C_REGS_HELPER 1,1,1,1,1
+-      .endm
+-      .macro RESTORE_C_REGS_EXCEPT_RAX
+-      RESTORE_C_REGS_HELPER 0,1,1,1,1
+-      .endm
+-      .macro RESTORE_C_REGS_EXCEPT_RCX
+-      RESTORE_C_REGS_HELPER 1,0,1,1,1
+-      .endm
+-      .macro RESTORE_C_REGS_EXCEPT_R11
+-      RESTORE_C_REGS_HELPER 1,1,0,1,1
+-      .endm
+-      .macro RESTORE_C_REGS_EXCEPT_RCX_R11
+-      RESTORE_C_REGS_HELPER 1,0,0,1,1
+-      .endm
+-
+-      .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
+-      subq $-(15*8+\addskip), %rsp
+-      .endm
+-
+       .macro icebp
+       .byte 0xf1
+       .endm
+-- 
+2.14.2
+
diff --git a/patches/kernel/0097-xen-x86-entry-64-Add-xen-NMI-trap-entry.patch b/patches/kernel/0097-xen-x86-entry-64-Add-xen-NMI-trap-entry.patch

deleted file mode 100644 (file)

index d0f7148..0000000
--- a/patches/kernel/0097-xen-x86-entry-64-Add-xen-NMI-trap-entry.patch
+++ /dev/null
@@ -1,105 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Thu, 2 Nov 2017 00:59:07 -0700
-Subject: [PATCH] xen, x86/entry/64: Add xen NMI trap entry
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Instead of trying to execute any NMI via the bare metal's NMI trap
-handler use a Xen specific one for PV domains, like we do for e.g.
-debug traps. As in a PV domain the NMI is handled via the normal
-kernel stack this is the correct thing to do.
-
-This will enable us to get rid of the very fragile and questionable
-dependencies between the bare metal NMI handler and Xen assumptions
-believed to be broken anyway.
-
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/5baf5c0528d58402441550c5770b98e7961e7680.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 43e4111086a70c78bedb6ad990bee97f17b27a6e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 20c970e03b42141abf6c45938ce6d4fdc3555921)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/traps.h | 2 +-
- arch/x86/xen/enlighten_pv.c  | 2 +-
- arch/x86/entry/entry_64.S    | 2 +-
- arch/x86/xen/xen-asm_64.S    | 2 +-
- 4 files changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
-index 8e5bf86f87e5..b052a7621ca1 100644
---- a/arch/x86/include/asm/traps.h
-+++ b/arch/x86/include/asm/traps.h
-@@ -55,9 +55,9 @@ asmlinkage void simd_coprocessor_error(void);
- 
- #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
- asmlinkage void xen_divide_error(void);
-+asmlinkage void xen_xennmi(void);
- asmlinkage void xen_xendebug(void);
- asmlinkage void xen_xenint3(void);
--asmlinkage void xen_nmi(void);
- asmlinkage void xen_overflow(void);
- asmlinkage void xen_bounds(void);
- asmlinkage void xen_invalid_op(void);
-diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
-index 69b9deff7e5c..8da4eff19c2a 100644
---- a/arch/x86/xen/enlighten_pv.c
-+++ b/arch/x86/xen/enlighten_pv.c
-@@ -600,7 +600,7 @@ static struct trap_array_entry trap_array[] = {
- #ifdef CONFIG_X86_MCE
-       { machine_check,               xen_machine_check,               true },
- #endif
--      { nmi,                         xen_nmi,                         true },
-+      { nmi,                         xen_xennmi,                      true },
-       { overflow,                    xen_overflow,                    false },
- #ifdef CONFIG_IA32_EMULATION
-       { entry_INT80_compat,          xen_entry_INT80_compat,          false },
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 4eff3aca54ed..5a6aba7cf3bd 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -1091,6 +1091,7 @@ idtentry int3                    do_int3                 has_error_code=0        paranoid=1 shift_ist=DEBUG_STACK
- idtentry stack_segment                do_stack_segment        has_error_code=1
- 
- #ifdef CONFIG_XEN
-+idtentry xennmi                       do_nmi                  has_error_code=0
- idtentry xendebug             do_debug                has_error_code=0
- idtentry xenint3              do_int3                 has_error_code=0
- #endif
-@@ -1253,7 +1254,6 @@ ENTRY(error_exit)
- END(error_exit)
- 
- /* Runs on exception stack */
--/* XXX: broken on Xen PV */
- ENTRY(nmi)
-       UNWIND_HINT_IRET_REGS
-       /*
-diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
-index dae2cc33afb5..286ecc198562 100644
---- a/arch/x86/xen/xen-asm_64.S
-+++ b/arch/x86/xen/xen-asm_64.S
-@@ -29,7 +29,7 @@ xen_pv_trap debug
- xen_pv_trap xendebug
- xen_pv_trap int3
- xen_pv_trap xenint3
--xen_pv_trap nmi
-+xen_pv_trap xennmi
- xen_pv_trap overflow
- xen_pv_trap bounds
- xen_pv_trap invalid_op
--- 
-2.14.2
-
diff --git a/patches/kernel/0098-x86-entry-64-De-Xen-ify-our-NMI-code.patch b/patches/kernel/0098-x86-entry-64-De-Xen-ify-our-NMI-code.patch

deleted file mode 100644 (file)

index 0a2d534..0000000
--- a/patches/kernel/0098-x86-entry-64-De-Xen-ify-our-NMI-code.patch
+++ /dev/null
@@ -1,117 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:08 -0700
-Subject: [PATCH] x86/entry/64: De-Xen-ify our NMI code
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Xen PV is fundamentally incompatible with our fancy NMI code: it
-doesn't use IST at all, and Xen entries clobber two stack slots
-below the hardware frame.
-
-Drop Xen PV support from our NMI code entirely.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Acked-by: Juergen Gross <jgross@suse.com>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/bfbe711b5ae03f672f8848999a8eb2711efc7f98.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 929bacec21478a72c78e4f29f98fb799bd00105a)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit ffc372909c1701c4fdd2bde7861692573ef381a7)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S | 30 ++++++++++++++++++------------
- 1 file changed, 18 insertions(+), 12 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 5a6aba7cf3bd..05501c781c20 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -1253,9 +1253,13 @@ ENTRY(error_exit)
-       jmp     retint_user
- END(error_exit)
- 
--/* Runs on exception stack */
-+/*
-+ * Runs on exception stack.  Xen PV does not go through this path at all,
-+ * so we can use real assembly here.
-+ */
- ENTRY(nmi)
-       UNWIND_HINT_IRET_REGS
-+
-       /*
-        * We allow breakpoints in NMIs. If a breakpoint occurs, then
-        * the iretq it performs will take us out of NMI context.
-@@ -1313,7 +1317,7 @@ ENTRY(nmi)
-        * stacks lest we corrupt the "NMI executing" variable.
-        */
- 
--      SWAPGS_UNSAFE_STACK
-+      swapgs
-       cld
-       movq    %rsp, %rdx
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-@@ -1478,7 +1482,7 @@ nested_nmi_out:
-       popq    %rdx
- 
-       /* We are returning to kernel mode, so this cannot result in a fault. */
--      INTERRUPT_RETURN
-+      iretq
- 
- first_nmi:
-       /* Restore rdx. */
-@@ -1509,7 +1513,7 @@ first_nmi:
-       pushfq                  /* RFLAGS */
-       pushq   $__KERNEL_CS    /* CS */
-       pushq   $1f             /* RIP */
--      INTERRUPT_RETURN        /* continues at repeat_nmi below */
-+      iretq                   /* continues at repeat_nmi below */
-       UNWIND_HINT_IRET_REGS
- 1:
- #endif
-@@ -1584,20 +1588,22 @@ nmi_restore:
-       /*
-        * Clear "NMI executing".  Set DF first so that we can easily
-        * distinguish the remaining code between here and IRET from
--       * the SYSCALL entry and exit paths.  On a native kernel, we
--       * could just inspect RIP, but, on paravirt kernels,
--       * INTERRUPT_RETURN can translate into a jump into a
--       * hypercall page.
-+       * the SYSCALL entry and exit paths.
-+       *
-+       * We arguably should just inspect RIP instead, but I (Andy) wrote
-+       * this code when I had the misapprehension that Xen PV supported
-+       * NMIs, and Xen PV would break that approach.
-        */
-       std
-       movq    $0, 5*8(%rsp)           /* clear "NMI executing" */
- 
-       /*
--       * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
--       * stack in a single instruction.  We are returning to kernel
--       * mode, so this cannot result in a fault.
-+       * iretq reads the "iret" frame and exits the NMI stack in a
-+       * single instruction.  We are returning to kernel mode, so this
-+       * cannot result in a fault.  Similarly, we don't need to worry
-+       * about espfix64 on the way back to kernel mode.
-        */
--      INTERRUPT_RETURN
-+      iretq
- END(nmi)
- 
- ENTRY(ignore_sysret)
--- 
-2.14.2
-
diff --git a/patches/kernel/0098-xen-x86-entry-64-Add-xen-NMI-trap-entry.patch b/patches/kernel/0098-xen-x86-entry-64-Add-xen-NMI-trap-entry.patch

new file mode 100644 (file)

index 0000000..d0f7148
--- /dev/null
+++ b/patches/kernel/0098-xen-x86-entry-64-Add-xen-NMI-trap-entry.patch
@@ -0,0 +1,105 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 2 Nov 2017 00:59:07 -0700
+Subject: [PATCH] xen, x86/entry/64: Add xen NMI trap entry
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Instead of trying to execute any NMI via the bare metal's NMI trap
+handler use a Xen specific one for PV domains, like we do for e.g.
+debug traps. As in a PV domain the NMI is handled via the normal
+kernel stack this is the correct thing to do.
+
+This will enable us to get rid of the very fragile and questionable
+dependencies between the bare metal NMI handler and Xen assumptions
+believed to be broken anyway.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/5baf5c0528d58402441550c5770b98e7961e7680.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 43e4111086a70c78bedb6ad990bee97f17b27a6e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 20c970e03b42141abf6c45938ce6d4fdc3555921)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/traps.h | 2 +-
+ arch/x86/xen/enlighten_pv.c  | 2 +-
+ arch/x86/entry/entry_64.S    | 2 +-
+ arch/x86/xen/xen-asm_64.S    | 2 +-
+ 4 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
+index 8e5bf86f87e5..b052a7621ca1 100644
+--- a/arch/x86/include/asm/traps.h
++++ b/arch/x86/include/asm/traps.h
+@@ -55,9 +55,9 @@ asmlinkage void simd_coprocessor_error(void);
+ 
+ #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+ asmlinkage void xen_divide_error(void);
++asmlinkage void xen_xennmi(void);
+ asmlinkage void xen_xendebug(void);
+ asmlinkage void xen_xenint3(void);
+-asmlinkage void xen_nmi(void);
+ asmlinkage void xen_overflow(void);
+ asmlinkage void xen_bounds(void);
+ asmlinkage void xen_invalid_op(void);
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index 69b9deff7e5c..8da4eff19c2a 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -600,7 +600,7 @@ static struct trap_array_entry trap_array[] = {
+ #ifdef CONFIG_X86_MCE
+       { machine_check,               xen_machine_check,               true },
+ #endif
+-      { nmi,                         xen_nmi,                         true },
++      { nmi,                         xen_xennmi,                      true },
+       { overflow,                    xen_overflow,                    false },
+ #ifdef CONFIG_IA32_EMULATION
+       { entry_INT80_compat,          xen_entry_INT80_compat,          false },
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 4eff3aca54ed..5a6aba7cf3bd 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1091,6 +1091,7 @@ idtentry int3                    do_int3                 has_error_code=0        paranoid=1 shift_ist=DEBUG_STACK
+ idtentry stack_segment                do_stack_segment        has_error_code=1
+ 
+ #ifdef CONFIG_XEN
++idtentry xennmi                       do_nmi                  has_error_code=0
+ idtentry xendebug             do_debug                has_error_code=0
+ idtentry xenint3              do_int3                 has_error_code=0
+ #endif
+@@ -1253,7 +1254,6 @@ ENTRY(error_exit)
+ END(error_exit)
+ 
+ /* Runs on exception stack */
+-/* XXX: broken on Xen PV */
+ ENTRY(nmi)
+       UNWIND_HINT_IRET_REGS
+       /*
+diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
+index dae2cc33afb5..286ecc198562 100644
+--- a/arch/x86/xen/xen-asm_64.S
++++ b/arch/x86/xen/xen-asm_64.S
+@@ -29,7 +29,7 @@ xen_pv_trap debug
+ xen_pv_trap xendebug
+ xen_pv_trap int3
+ xen_pv_trap xenint3
+-xen_pv_trap nmi
++xen_pv_trap xennmi
+ xen_pv_trap overflow
+ xen_pv_trap bounds
+ xen_pv_trap invalid_op
+-- 
+2.14.2
+
diff --git a/patches/kernel/0099-x86-entry-32-Pull-the-MSR_IA32_SYSENTER_CS-update-co.patch b/patches/kernel/0099-x86-entry-32-Pull-the-MSR_IA32_SYSENTER_CS-update-co.patch

deleted file mode 100644 (file)

index 05daea1..0000000
--- a/patches/kernel/0099-x86-entry-32-Pull-the-MSR_IA32_SYSENTER_CS-update-co.patch
+++ /dev/null
@@ -1,145 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:09 -0700
-Subject: [PATCH] x86/entry/32: Pull the MSR_IA32_SYSENTER_CS update code out
- of native_load_sp0()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This causes the MSR_IA32_SYSENTER_CS write to move out of the
-paravirt callback.  This shouldn't affect Xen PV: Xen already ignores
-MSR_IA32_SYSENTER_ESP writes.  In any event, Xen doesn't support
-vm86() in a useful way.
-
-Note to any potential backporters: This patch won't break lguest, as
-lguest didn't have any SYSENTER support at all.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/75cf09fe03ae778532d0ca6c65aa58e66bc2f90c.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit bd7dc5a6afac719d8ce4092391eef2c7e83c2a75)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 779e32d0da9a547f3b11fbecac8287e458ba67f5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/processor.h |  7 -------
- arch/x86/include/asm/switch_to.h | 12 ++++++++++++
- arch/x86/kernel/process_32.c     |  4 +++-
- arch/x86/kernel/process_64.c     |  2 +-
- arch/x86/kernel/vm86_32.c        |  6 +++++-
- 5 files changed, 21 insertions(+), 10 deletions(-)
-
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 028245e1c42b..ee37fb86900a 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -513,13 +513,6 @@ static inline void
- native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
- {
-       tss->x86_tss.sp0 = thread->sp0;
--#ifdef CONFIG_X86_32
--      /* Only happens when SEP is enabled, no need to test "SEP"arately: */
--      if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
--              tss->x86_tss.ss1 = thread->sysenter_cs;
--              wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
--      }
--#endif
- }
- 
- static inline void native_swapgs(void)
-diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
-index fcc5cd387fd1..7ae8caffbada 100644
---- a/arch/x86/include/asm/switch_to.h
-+++ b/arch/x86/include/asm/switch_to.h
-@@ -72,4 +72,16 @@ do {                                                                        \
-       ((last) = __switch_to_asm((prev), (next)));                     \
- } while (0)
- 
-+#ifdef CONFIG_X86_32
-+static inline void refresh_sysenter_cs(struct thread_struct *thread)
-+{
-+      /* Only happens when SEP is enabled, no need to test "SEP"arately: */
-+      if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
-+              return;
-+
-+      this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
-+      wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
-+}
-+#endif
-+
- #endif /* _ASM_X86_SWITCH_TO_H */
-diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
-index 22802162eeb9..2e42b66b8ca4 100644
---- a/arch/x86/kernel/process_32.c
-+++ b/arch/x86/kernel/process_32.c
-@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
- 
-       /*
-        * Reload esp0 and cpu_current_top_of_stack.  This changes
--       * current_thread_info().
-+       * current_thread_info().  Refresh the SYSENTER configuration in
-+       * case prev or next is vm86.
-        */
-       load_sp0(tss, next);
-+      refresh_sysenter_cs(next);
-       this_cpu_write(cpu_current_top_of_stack,
-                      (unsigned long)task_stack_page(next_p) +
-                      THREAD_SIZE);
-diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
-index 1e7701c4cd80..565daaa6f18d 100644
---- a/arch/x86/kernel/process_64.c
-+++ b/arch/x86/kernel/process_64.c
-@@ -465,7 +465,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-        */
-       this_cpu_write(current_task, next_p);
- 
--      /* Reload esp0 and ss1.  This changes current_thread_info(). */
-+      /* Reload sp0. */
-       load_sp0(tss, next);
- 
-       /*
-diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
-index 7924a5356c8a..5bc1c3ab6287 100644
---- a/arch/x86/kernel/vm86_32.c
-+++ b/arch/x86/kernel/vm86_32.c
-@@ -54,6 +54,7 @@
- #include <asm/irq.h>
- #include <asm/traps.h>
- #include <asm/vm86.h>
-+#include <asm/switch_to.h>
- 
- /*
-  * Known problems:
-@@ -149,6 +150,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
-       tsk->thread.sp0 = vm86->saved_sp0;
-       tsk->thread.sysenter_cs = __KERNEL_CS;
-       load_sp0(tss, &tsk->thread);
-+      refresh_sysenter_cs(&tsk->thread);
-       vm86->saved_sp0 = 0;
-       put_cpu();
- 
-@@ -368,8 +370,10 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
-       /* make room for real-mode segments */
-       tsk->thread.sp0 += 16;
- 
--      if (static_cpu_has(X86_FEATURE_SEP))
-+      if (static_cpu_has(X86_FEATURE_SEP)) {
-               tsk->thread.sysenter_cs = 0;
-+              refresh_sysenter_cs(&tsk->thread);
-+      }
- 
-       load_sp0(tss, &tsk->thread);
-       put_cpu();
--- 
-2.14.2
-
diff --git a/patches/kernel/0099-x86-entry-64-De-Xen-ify-our-NMI-code.patch b/patches/kernel/0099-x86-entry-64-De-Xen-ify-our-NMI-code.patch

new file mode 100644 (file)

index 0000000..0a2d534
--- /dev/null
+++ b/patches/kernel/0099-x86-entry-64-De-Xen-ify-our-NMI-code.patch
@@ -0,0 +1,117 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:08 -0700
+Subject: [PATCH] x86/entry/64: De-Xen-ify our NMI code
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Xen PV is fundamentally incompatible with our fancy NMI code: it
+doesn't use IST at all, and Xen entries clobber two stack slots
+below the hardware frame.
+
+Drop Xen PV support from our NMI code entirely.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Juergen Gross <jgross@suse.com>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/bfbe711b5ae03f672f8848999a8eb2711efc7f98.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 929bacec21478a72c78e4f29f98fb799bd00105a)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit ffc372909c1701c4fdd2bde7861692573ef381a7)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S | 30 ++++++++++++++++++------------
+ 1 file changed, 18 insertions(+), 12 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 5a6aba7cf3bd..05501c781c20 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1253,9 +1253,13 @@ ENTRY(error_exit)
+       jmp     retint_user
+ END(error_exit)
+ 
+-/* Runs on exception stack */
++/*
++ * Runs on exception stack.  Xen PV does not go through this path at all,
++ * so we can use real assembly here.
++ */
+ ENTRY(nmi)
+       UNWIND_HINT_IRET_REGS
++
+       /*
+        * We allow breakpoints in NMIs. If a breakpoint occurs, then
+        * the iretq it performs will take us out of NMI context.
+@@ -1313,7 +1317,7 @@ ENTRY(nmi)
+        * stacks lest we corrupt the "NMI executing" variable.
+        */
+ 
+-      SWAPGS_UNSAFE_STACK
++      swapgs
+       cld
+       movq    %rsp, %rdx
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+@@ -1478,7 +1482,7 @@ nested_nmi_out:
+       popq    %rdx
+ 
+       /* We are returning to kernel mode, so this cannot result in a fault. */
+-      INTERRUPT_RETURN
++      iretq
+ 
+ first_nmi:
+       /* Restore rdx. */
+@@ -1509,7 +1513,7 @@ first_nmi:
+       pushfq                  /* RFLAGS */
+       pushq   $__KERNEL_CS    /* CS */
+       pushq   $1f             /* RIP */
+-      INTERRUPT_RETURN        /* continues at repeat_nmi below */
++      iretq                   /* continues at repeat_nmi below */
+       UNWIND_HINT_IRET_REGS
+ 1:
+ #endif
+@@ -1584,20 +1588,22 @@ nmi_restore:
+       /*
+        * Clear "NMI executing".  Set DF first so that we can easily
+        * distinguish the remaining code between here and IRET from
+-       * the SYSCALL entry and exit paths.  On a native kernel, we
+-       * could just inspect RIP, but, on paravirt kernels,
+-       * INTERRUPT_RETURN can translate into a jump into a
+-       * hypercall page.
++       * the SYSCALL entry and exit paths.
++       *
++       * We arguably should just inspect RIP instead, but I (Andy) wrote
++       * this code when I had the misapprehension that Xen PV supported
++       * NMIs, and Xen PV would break that approach.
+        */
+       std
+       movq    $0, 5*8(%rsp)           /* clear "NMI executing" */
+ 
+       /*
+-       * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
+-       * stack in a single instruction.  We are returning to kernel
+-       * mode, so this cannot result in a fault.
++       * iretq reads the "iret" frame and exits the NMI stack in a
++       * single instruction.  We are returning to kernel mode, so this
++       * cannot result in a fault.  Similarly, we don't need to worry
++       * about espfix64 on the way back to kernel mode.
+        */
+-      INTERRUPT_RETURN
++      iretq
+ END(nmi)
+ 
+ ENTRY(ignore_sysret)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0100-x86-entry-32-Pull-the-MSR_IA32_SYSENTER_CS-update-co.patch b/patches/kernel/0100-x86-entry-32-Pull-the-MSR_IA32_SYSENTER_CS-update-co.patch

new file mode 100644 (file)

index 0000000..05daea1
--- /dev/null
+++ b/patches/kernel/0100-x86-entry-32-Pull-the-MSR_IA32_SYSENTER_CS-update-co.patch
@@ -0,0 +1,145 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:09 -0700
+Subject: [PATCH] x86/entry/32: Pull the MSR_IA32_SYSENTER_CS update code out
+ of native_load_sp0()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This causes the MSR_IA32_SYSENTER_CS write to move out of the
+paravirt callback.  This shouldn't affect Xen PV: Xen already ignores
+MSR_IA32_SYSENTER_ESP writes.  In any event, Xen doesn't support
+vm86() in a useful way.
+
+Note to any potential backporters: This patch won't break lguest, as
+lguest didn't have any SYSENTER support at all.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/75cf09fe03ae778532d0ca6c65aa58e66bc2f90c.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit bd7dc5a6afac719d8ce4092391eef2c7e83c2a75)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 779e32d0da9a547f3b11fbecac8287e458ba67f5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/processor.h |  7 -------
+ arch/x86/include/asm/switch_to.h | 12 ++++++++++++
+ arch/x86/kernel/process_32.c     |  4 +++-
+ arch/x86/kernel/process_64.c     |  2 +-
+ arch/x86/kernel/vm86_32.c        |  6 +++++-
+ 5 files changed, 21 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 028245e1c42b..ee37fb86900a 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -513,13 +513,6 @@ static inline void
+ native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
+ {
+       tss->x86_tss.sp0 = thread->sp0;
+-#ifdef CONFIG_X86_32
+-      /* Only happens when SEP is enabled, no need to test "SEP"arately: */
+-      if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
+-              tss->x86_tss.ss1 = thread->sysenter_cs;
+-              wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+-      }
+-#endif
+ }
+ 
+ static inline void native_swapgs(void)
+diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
+index fcc5cd387fd1..7ae8caffbada 100644
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -72,4 +72,16 @@ do {                                                                        \
+       ((last) = __switch_to_asm((prev), (next)));                     \
+ } while (0)
+ 
++#ifdef CONFIG_X86_32
++static inline void refresh_sysenter_cs(struct thread_struct *thread)
++{
++      /* Only happens when SEP is enabled, no need to test "SEP"arately: */
++      if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
++              return;
++
++      this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
++      wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
++}
++#endif
++
+ #endif /* _ASM_X86_SWITCH_TO_H */
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index 22802162eeb9..2e42b66b8ca4 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ 
+       /*
+        * Reload esp0 and cpu_current_top_of_stack.  This changes
+-       * current_thread_info().
++       * current_thread_info().  Refresh the SYSENTER configuration in
++       * case prev or next is vm86.
+        */
+       load_sp0(tss, next);
++      refresh_sysenter_cs(next);
+       this_cpu_write(cpu_current_top_of_stack,
+                      (unsigned long)task_stack_page(next_p) +
+                      THREAD_SIZE);
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index 1e7701c4cd80..565daaa6f18d 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -465,7 +465,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+        */
+       this_cpu_write(current_task, next_p);
+ 
+-      /* Reload esp0 and ss1.  This changes current_thread_info(). */
++      /* Reload sp0. */
+       load_sp0(tss, next);
+ 
+       /*
+diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
+index 7924a5356c8a..5bc1c3ab6287 100644
+--- a/arch/x86/kernel/vm86_32.c
++++ b/arch/x86/kernel/vm86_32.c
+@@ -54,6 +54,7 @@
+ #include <asm/irq.h>
+ #include <asm/traps.h>
+ #include <asm/vm86.h>
++#include <asm/switch_to.h>
+ 
+ /*
+  * Known problems:
+@@ -149,6 +150,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
+       tsk->thread.sp0 = vm86->saved_sp0;
+       tsk->thread.sysenter_cs = __KERNEL_CS;
+       load_sp0(tss, &tsk->thread);
++      refresh_sysenter_cs(&tsk->thread);
+       vm86->saved_sp0 = 0;
+       put_cpu();
+ 
+@@ -368,8 +370,10 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
+       /* make room for real-mode segments */
+       tsk->thread.sp0 += 16;
+ 
+-      if (static_cpu_has(X86_FEATURE_SEP))
++      if (static_cpu_has(X86_FEATURE_SEP)) {
+               tsk->thread.sysenter_cs = 0;
++              refresh_sysenter_cs(&tsk->thread);
++      }
+ 
+       load_sp0(tss, &tsk->thread);
+       put_cpu();
+-- 
+2.14.2
+
diff --git a/patches/kernel/0100-x86-entry-64-Pass-SP0-directly-to-load_sp0.patch b/patches/kernel/0100-x86-entry-64-Pass-SP0-directly-to-load_sp0.patch

deleted file mode 100644 (file)

index 4b94cba..0000000
--- a/patches/kernel/0100-x86-entry-64-Pass-SP0-directly-to-load_sp0.patch
+++ /dev/null
@@ -1,238 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:10 -0700
-Subject: [PATCH] x86/entry/64: Pass SP0 directly to load_sp0()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-load_sp0() had an odd signature:
-
-  void load_sp0(struct tss_struct *tss, struct thread_struct *thread);
-
-Simplify it to:
-
-  void load_sp0(unsigned long sp0);
-
-Also simplify a few get_cpu()/put_cpu() sequences to
-preempt_disable()/preempt_enable().
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/2655d8b42ed940aa384fe18ee1129bbbcf730a08.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit da51da189a24bb9b7e2d5a123be096e51a4695a5)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 41f6a89b0be4d052a6af59df5e56102d4e4c79ef)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/paravirt.h       |  5 ++---
- arch/x86/include/asm/paravirt_types.h |  2 +-
- arch/x86/include/asm/processor.h      |  9 ++++-----
- arch/x86/kernel/cpu/common.c          |  4 ++--
- arch/x86/kernel/process_32.c          |  2 +-
- arch/x86/kernel/process_64.c          |  2 +-
- arch/x86/kernel/vm86_32.c             | 14 ++++++--------
- arch/x86/xen/enlighten_pv.c           |  7 +++----
- 8 files changed, 20 insertions(+), 25 deletions(-)
-
-diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
-index 12deec722cf0..43d4f90edebc 100644
---- a/arch/x86/include/asm/paravirt.h
-+++ b/arch/x86/include/asm/paravirt.h
-@@ -15,10 +15,9 @@
- #include <linux/cpumask.h>
- #include <asm/frame.h>
- 
--static inline void load_sp0(struct tss_struct *tss,
--                           struct thread_struct *thread)
-+static inline void load_sp0(unsigned long sp0)
- {
--      PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
-+      PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
- }
- 
- /* The paravirtualized CPUID instruction. */
-diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
-index 42873edd9f9d..e3953a1e2b57 100644
---- a/arch/x86/include/asm/paravirt_types.h
-+++ b/arch/x86/include/asm/paravirt_types.h
-@@ -133,7 +133,7 @@ struct pv_cpu_ops {
-       void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
-       void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
- 
--      void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
-+      void (*load_sp0)(unsigned long sp0);
- 
-       void (*set_iopl_mask)(unsigned mask);
- 
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index ee37fb86900a..85ddfc1a9bb5 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -510,9 +510,9 @@ static inline void native_set_iopl_mask(unsigned mask)
- }
- 
- static inline void
--native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
-+native_load_sp0(unsigned long sp0)
- {
--      tss->x86_tss.sp0 = thread->sp0;
-+      this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
- }
- 
- static inline void native_swapgs(void)
-@@ -537,10 +537,9 @@ static inline unsigned long current_top_of_stack(void)
- #else
- #define __cpuid                       native_cpuid
- 
--static inline void load_sp0(struct tss_struct *tss,
--                          struct thread_struct *thread)
-+static inline void load_sp0(unsigned long sp0)
- {
--      native_load_sp0(tss, thread);
-+      native_load_sp0(sp0);
- }
- 
- #define set_iopl_mask native_set_iopl_mask
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index ef7b1ba56363..6562acbfc4e0 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -1570,7 +1570,7 @@ void cpu_init(void)
-       BUG_ON(me->mm);
-       enter_lazy_tlb(&init_mm, me);
- 
--      load_sp0(t, &current->thread);
-+      load_sp0(current->thread.sp0);
-       set_tss_desc(cpu, t);
-       load_TR_desc();
-       load_mm_ldt(&init_mm);
-@@ -1624,7 +1624,7 @@ void cpu_init(void)
-       BUG_ON(curr->mm);
-       enter_lazy_tlb(&init_mm, curr);
- 
--      load_sp0(t, thread);
-+      load_sp0(thread->sp0);
-       set_tss_desc(cpu, t);
-       load_TR_desc();
-       load_mm_ldt(&init_mm);
-diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
-index 2e42b66b8ca4..48a3f240f565 100644
---- a/arch/x86/kernel/process_32.c
-+++ b/arch/x86/kernel/process_32.c
-@@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-        * current_thread_info().  Refresh the SYSENTER configuration in
-        * case prev or next is vm86.
-        */
--      load_sp0(tss, next);
-+      load_sp0(next->sp0);
-       refresh_sysenter_cs(next);
-       this_cpu_write(cpu_current_top_of_stack,
-                      (unsigned long)task_stack_page(next_p) +
-diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
-index 565daaa6f18d..37b933628a8b 100644
---- a/arch/x86/kernel/process_64.c
-+++ b/arch/x86/kernel/process_64.c
-@@ -466,7 +466,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-       this_cpu_write(current_task, next_p);
- 
-       /* Reload sp0. */
--      load_sp0(tss, next);
-+      load_sp0(next->sp0);
- 
-       /*
-        * Now maybe reload the debug registers and handle I/O bitmaps
-diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
-index 5bc1c3ab6287..0f1d92cd20ad 100644
---- a/arch/x86/kernel/vm86_32.c
-+++ b/arch/x86/kernel/vm86_32.c
-@@ -94,7 +94,6 @@
- 
- void save_v86_state(struct kernel_vm86_regs *regs, int retval)
- {
--      struct tss_struct *tss;
-       struct task_struct *tsk = current;
-       struct vm86plus_struct __user *user;
-       struct vm86 *vm86 = current->thread.vm86;
-@@ -146,13 +145,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
-               do_exit(SIGSEGV);
-       }
- 
--      tss = &per_cpu(cpu_tss, get_cpu());
-+      preempt_disable();
-       tsk->thread.sp0 = vm86->saved_sp0;
-       tsk->thread.sysenter_cs = __KERNEL_CS;
--      load_sp0(tss, &tsk->thread);
-+      load_sp0(tsk->thread.sp0);
-       refresh_sysenter_cs(&tsk->thread);
-       vm86->saved_sp0 = 0;
--      put_cpu();
-+      preempt_enable();
- 
-       memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
- 
-@@ -238,7 +237,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
- 
- static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
- {
--      struct tss_struct *tss;
-       struct task_struct *tsk = current;
-       struct vm86 *vm86 = tsk->thread.vm86;
-       struct kernel_vm86_regs vm86regs;
-@@ -366,8 +364,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
-       vm86->saved_sp0 = tsk->thread.sp0;
-       lazy_save_gs(vm86->regs32.gs);
- 
--      tss = &per_cpu(cpu_tss, get_cpu());
-       /* make room for real-mode segments */
-+      preempt_disable();
-       tsk->thread.sp0 += 16;
- 
-       if (static_cpu_has(X86_FEATURE_SEP)) {
-@@ -375,8 +373,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
-               refresh_sysenter_cs(&tsk->thread);
-       }
- 
--      load_sp0(tss, &tsk->thread);
--      put_cpu();
-+      load_sp0(tsk->thread.sp0);
-+      preempt_enable();
- 
-       if (vm86->flags & VM86_SCREEN_BITMAP)
-               mark_screen_rdonly(tsk->mm);
-diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
-index 8da4eff19c2a..e7b213047724 100644
---- a/arch/x86/xen/enlighten_pv.c
-+++ b/arch/x86/xen/enlighten_pv.c
-@@ -810,15 +810,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
-       }
- }
- 
--static void xen_load_sp0(struct tss_struct *tss,
--                       struct thread_struct *thread)
-+static void xen_load_sp0(unsigned long sp0)
- {
-       struct multicall_space mcs;
- 
-       mcs = xen_mc_entry(0);
--      MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
-+      MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
-       xen_mc_issue(PARAVIRT_LAZY_CPU);
--      tss->x86_tss.sp0 = thread->sp0;
-+      this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
- }
- 
- void xen_set_iopl_mask(unsigned mask)
--- 
-2.14.2
-
diff --git a/patches/kernel/0101-x86-entry-64-Pass-SP0-directly-to-load_sp0.patch b/patches/kernel/0101-x86-entry-64-Pass-SP0-directly-to-load_sp0.patch

new file mode 100644 (file)

index 0000000..4b94cba
--- /dev/null
+++ b/patches/kernel/0101-x86-entry-64-Pass-SP0-directly-to-load_sp0.patch
@@ -0,0 +1,238 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:10 -0700
+Subject: [PATCH] x86/entry/64: Pass SP0 directly to load_sp0()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+load_sp0() had an odd signature:
+
+  void load_sp0(struct tss_struct *tss, struct thread_struct *thread);
+
+Simplify it to:
+
+  void load_sp0(unsigned long sp0);
+
+Also simplify a few get_cpu()/put_cpu() sequences to
+preempt_disable()/preempt_enable().
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/2655d8b42ed940aa384fe18ee1129bbbcf730a08.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit da51da189a24bb9b7e2d5a123be096e51a4695a5)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 41f6a89b0be4d052a6af59df5e56102d4e4c79ef)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/paravirt.h       |  5 ++---
+ arch/x86/include/asm/paravirt_types.h |  2 +-
+ arch/x86/include/asm/processor.h      |  9 ++++-----
+ arch/x86/kernel/cpu/common.c          |  4 ++--
+ arch/x86/kernel/process_32.c          |  2 +-
+ arch/x86/kernel/process_64.c          |  2 +-
+ arch/x86/kernel/vm86_32.c             | 14 ++++++--------
+ arch/x86/xen/enlighten_pv.c           |  7 +++----
+ 8 files changed, 20 insertions(+), 25 deletions(-)
+
+diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
+index 12deec722cf0..43d4f90edebc 100644
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -15,10 +15,9 @@
+ #include <linux/cpumask.h>
+ #include <asm/frame.h>
+ 
+-static inline void load_sp0(struct tss_struct *tss,
+-                           struct thread_struct *thread)
++static inline void load_sp0(unsigned long sp0)
+ {
+-      PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
++      PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
+ }
+ 
+ /* The paravirtualized CPUID instruction. */
+diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
+index 42873edd9f9d..e3953a1e2b57 100644
+--- a/arch/x86/include/asm/paravirt_types.h
++++ b/arch/x86/include/asm/paravirt_types.h
+@@ -133,7 +133,7 @@ struct pv_cpu_ops {
+       void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
+       void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
+ 
+-      void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
++      void (*load_sp0)(unsigned long sp0);
+ 
+       void (*set_iopl_mask)(unsigned mask);
+ 
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index ee37fb86900a..85ddfc1a9bb5 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -510,9 +510,9 @@ static inline void native_set_iopl_mask(unsigned mask)
+ }
+ 
+ static inline void
+-native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
++native_load_sp0(unsigned long sp0)
+ {
+-      tss->x86_tss.sp0 = thread->sp0;
++      this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+ }
+ 
+ static inline void native_swapgs(void)
+@@ -537,10 +537,9 @@ static inline unsigned long current_top_of_stack(void)
+ #else
+ #define __cpuid                       native_cpuid
+ 
+-static inline void load_sp0(struct tss_struct *tss,
+-                          struct thread_struct *thread)
++static inline void load_sp0(unsigned long sp0)
+ {
+-      native_load_sp0(tss, thread);
++      native_load_sp0(sp0);
+ }
+ 
+ #define set_iopl_mask native_set_iopl_mask
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index ef7b1ba56363..6562acbfc4e0 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1570,7 +1570,7 @@ void cpu_init(void)
+       BUG_ON(me->mm);
+       enter_lazy_tlb(&init_mm, me);
+ 
+-      load_sp0(t, &current->thread);
++      load_sp0(current->thread.sp0);
+       set_tss_desc(cpu, t);
+       load_TR_desc();
+       load_mm_ldt(&init_mm);
+@@ -1624,7 +1624,7 @@ void cpu_init(void)
+       BUG_ON(curr->mm);
+       enter_lazy_tlb(&init_mm, curr);
+ 
+-      load_sp0(t, thread);
++      load_sp0(thread->sp0);
+       set_tss_desc(cpu, t);
+       load_TR_desc();
+       load_mm_ldt(&init_mm);
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index 2e42b66b8ca4..48a3f240f565 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+        * current_thread_info().  Refresh the SYSENTER configuration in
+        * case prev or next is vm86.
+        */
+-      load_sp0(tss, next);
++      load_sp0(next->sp0);
+       refresh_sysenter_cs(next);
+       this_cpu_write(cpu_current_top_of_stack,
+                      (unsigned long)task_stack_page(next_p) +
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index 565daaa6f18d..37b933628a8b 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -466,7 +466,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+       this_cpu_write(current_task, next_p);
+ 
+       /* Reload sp0. */
+-      load_sp0(tss, next);
++      load_sp0(next->sp0);
+ 
+       /*
+        * Now maybe reload the debug registers and handle I/O bitmaps
+diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
+index 5bc1c3ab6287..0f1d92cd20ad 100644
+--- a/arch/x86/kernel/vm86_32.c
++++ b/arch/x86/kernel/vm86_32.c
+@@ -94,7 +94,6 @@
+ 
+ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
+ {
+-      struct tss_struct *tss;
+       struct task_struct *tsk = current;
+       struct vm86plus_struct __user *user;
+       struct vm86 *vm86 = current->thread.vm86;
+@@ -146,13 +145,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
+               do_exit(SIGSEGV);
+       }
+ 
+-      tss = &per_cpu(cpu_tss, get_cpu());
++      preempt_disable();
+       tsk->thread.sp0 = vm86->saved_sp0;
+       tsk->thread.sysenter_cs = __KERNEL_CS;
+-      load_sp0(tss, &tsk->thread);
++      load_sp0(tsk->thread.sp0);
+       refresh_sysenter_cs(&tsk->thread);
+       vm86->saved_sp0 = 0;
+-      put_cpu();
++      preempt_enable();
+ 
+       memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
+ 
+@@ -238,7 +237,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
+ 
+ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
+ {
+-      struct tss_struct *tss;
+       struct task_struct *tsk = current;
+       struct vm86 *vm86 = tsk->thread.vm86;
+       struct kernel_vm86_regs vm86regs;
+@@ -366,8 +364,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
+       vm86->saved_sp0 = tsk->thread.sp0;
+       lazy_save_gs(vm86->regs32.gs);
+ 
+-      tss = &per_cpu(cpu_tss, get_cpu());
+       /* make room for real-mode segments */
++      preempt_disable();
+       tsk->thread.sp0 += 16;
+ 
+       if (static_cpu_has(X86_FEATURE_SEP)) {
+@@ -375,8 +373,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
+               refresh_sysenter_cs(&tsk->thread);
+       }
+ 
+-      load_sp0(tss, &tsk->thread);
+-      put_cpu();
++      load_sp0(tsk->thread.sp0);
++      preempt_enable();
+ 
+       if (vm86->flags & VM86_SCREEN_BITMAP)
+               mark_screen_rdonly(tsk->mm);
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index 8da4eff19c2a..e7b213047724 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -810,15 +810,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
+       }
+ }
+ 
+-static void xen_load_sp0(struct tss_struct *tss,
+-                       struct thread_struct *thread)
++static void xen_load_sp0(unsigned long sp0)
+ {
+       struct multicall_space mcs;
+ 
+       mcs = xen_mc_entry(0);
+-      MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
++      MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
+       xen_mc_issue(PARAVIRT_LAZY_CPU);
+-      tss->x86_tss.sp0 = thread->sp0;
++      this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+ }
+ 
+ void xen_set_iopl_mask(unsigned mask)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0101-x86-entry-Add-task_top_of_stack-to-find-the-top-of-a.patch b/patches/kernel/0101-x86-entry-Add-task_top_of_stack-to-find-the-top-of-a.patch

deleted file mode 100644 (file)

index 1708df2..0000000
--- a/patches/kernel/0101-x86-entry-Add-task_top_of_stack-to-find-the-top-of-a.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:11 -0700
-Subject: [PATCH] x86/entry: Add task_top_of_stack() to find the top of a
- task's stack
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This will let us get rid of a few places that hardcode accesses to
-thread.sp0.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/b49b3f95a8ff858c40c9b0f5b32be0355324327d.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 3500130b84a3cdc5b6796eba1daf178944935efe)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f1078e10e361afaeb22ee72c54d5ad397e19728d)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/processor.h | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 85ddfc1a9bb5..f83fbf1b6dd9 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -788,6 +788,8 @@ static inline void spin_lock_prefetch(const void *x)
- #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
-                          TOP_OF_KERNEL_STACK_PADDING)
- 
-+#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
-+
- #ifdef CONFIG_X86_32
- /*
-  * User space process size: 3GB (default).
--- 
-2.14.2
-
diff --git a/patches/kernel/0102-x86-entry-Add-task_top_of_stack-to-find-the-top-of-a.patch b/patches/kernel/0102-x86-entry-Add-task_top_of_stack-to-find-the-top-of-a.patch

new file mode 100644 (file)

index 0000000..1708df2
--- /dev/null
+++ b/patches/kernel/0102-x86-entry-Add-task_top_of_stack-to-find-the-top-of-a.patch
@@ -0,0 +1,48 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:11 -0700
+Subject: [PATCH] x86/entry: Add task_top_of_stack() to find the top of a
+ task's stack
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This will let us get rid of a few places that hardcode accesses to
+thread.sp0.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/b49b3f95a8ff858c40c9b0f5b32be0355324327d.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 3500130b84a3cdc5b6796eba1daf178944935efe)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f1078e10e361afaeb22ee72c54d5ad397e19728d)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/processor.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 85ddfc1a9bb5..f83fbf1b6dd9 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -788,6 +788,8 @@ static inline void spin_lock_prefetch(const void *x)
+ #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
+                          TOP_OF_KERNEL_STACK_PADDING)
+ 
++#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
++
+ #ifdef CONFIG_X86_32
+ /*
+  * User space process size: 3GB (default).
+-- 
+2.14.2
+
diff --git a/patches/kernel/0102-x86-xen-64-x86-entry-64-Clean-up-SP-code-in-cpu_init.patch b/patches/kernel/0102-x86-xen-64-x86-entry-64-Clean-up-SP-code-in-cpu_init.patch

deleted file mode 100644 (file)

index 5c37994..0000000
--- a/patches/kernel/0102-x86-xen-64-x86-entry-64-Clean-up-SP-code-in-cpu_init.patch
+++ /dev/null
@@ -1,99 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:12 -0700
-Subject: [PATCH] x86/xen/64, x86/entry/64: Clean up SP code in
- cpu_initialize_context()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-I'm removing thread_struct::sp0, and Xen's usage of it is slightly
-dubious and unnecessary.  Use appropriate helpers instead.
-
-While we're at at, reorder the code slightly to make it more obvious
-what's going on.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/d5b9a3da2b47c68325bd2bbe8f82d9554dee0d0f.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit f16b3da1dc936c0f8121741d0a1731bf242f2f56)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 27c60a1f6c49062151f67042458a523386cc3dc5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/xen/smp_pv.c | 17 ++++++++++++++---
- 1 file changed, 14 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
-index 51471408fdd1..8c0e047d0b80 100644
---- a/arch/x86/xen/smp_pv.c
-+++ b/arch/x86/xen/smp_pv.c
-@@ -13,6 +13,7 @@
-  * single-threaded.
-  */
- #include <linux/sched.h>
-+#include <linux/sched/task_stack.h>
- #include <linux/err.h>
- #include <linux/slab.h>
- #include <linux/smp.h>
-@@ -293,12 +294,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
- #endif
-       memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
- 
-+      /*
-+       * Bring up the CPU in cpu_bringup_and_idle() with the stack
-+       * pointing just below where pt_regs would be if it were a normal
-+       * kernel entry.
-+       */
-       ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
-       ctxt->flags = VGCF_IN_KERNEL;
-       ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
-       ctxt->user_regs.ds = __USER_DS;
-       ctxt->user_regs.es = __USER_DS;
-       ctxt->user_regs.ss = __KERNEL_DS;
-+      ctxt->user_regs.cs = __KERNEL_CS;
-+      ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
- 
-       xen_copy_trap_info(ctxt->trap_ctxt);
- 
-@@ -313,8 +321,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
-       ctxt->gdt_frames[0] = gdt_mfn;
-       ctxt->gdt_ents      = GDT_ENTRIES;
- 
-+      /*
-+       * Set SS:SP that Xen will use when entering guest kernel mode
-+       * from guest user mode.  Subsequent calls to load_sp0() can
-+       * change this value.
-+       */
-       ctxt->kernel_ss = __KERNEL_DS;
--      ctxt->kernel_sp = idle->thread.sp0;
-+      ctxt->kernel_sp = task_top_of_stack(idle);
- 
- #ifdef CONFIG_X86_32
-       ctxt->event_callback_cs     = __KERNEL_CS;
-@@ -326,10 +339,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
-               (unsigned long)xen_hypervisor_callback;
-       ctxt->failsafe_callback_eip =
-               (unsigned long)xen_failsafe_callback;
--      ctxt->user_regs.cs = __KERNEL_CS;
-       per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
- 
--      ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
-       ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
-       if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
-               BUG();
--- 
-2.14.2
-
diff --git a/patches/kernel/0103-x86-entry-64-Stop-initializing-TSS.sp0-at-boot.patch b/patches/kernel/0103-x86-entry-64-Stop-initializing-TSS.sp0-at-boot.patch

deleted file mode 100644 (file)

index acf1fd2..0000000
--- a/patches/kernel/0103-x86-entry-64-Stop-initializing-TSS.sp0-at-boot.patch
+++ /dev/null
@@ -1,102 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:13 -0700
-Subject: [PATCH] x86/entry/64: Stop initializing TSS.sp0 at boot
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-In my quest to get rid of thread_struct::sp0, I want to clean up or
-remove all of its readers.  Two of them are in cpu_init() (32-bit and
-64-bit), and they aren't needed.  This is because we never enter
-userspace at all on the threads that CPUs are initialized in.
-
-Poison the initial TSS.sp0 and stop initializing it on CPU init.
-
-The comment text mostly comes from Dave Hansen.  Thanks!
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/ee4a00540ad28c6cff475fbcc7769a4460acc861.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 20bb83443ea79087b5e5f8dab4e9d80bb9bf7acb)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8c6b12e88bd87433087ea1f1cd5a9a4975e4623c)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/common.c | 13 ++++++++++---
- arch/x86/kernel/process.c    |  8 +++++++-
- 2 files changed, 17 insertions(+), 4 deletions(-)
-
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 6562acbfc4e0..121fe3570d6f 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -1570,9 +1570,13 @@ void cpu_init(void)
-       BUG_ON(me->mm);
-       enter_lazy_tlb(&init_mm, me);
- 
--      load_sp0(current->thread.sp0);
-+      /*
-+       * Initialize the TSS.  Don't bother initializing sp0, as the initial
-+       * task never enters user mode.
-+       */
-       set_tss_desc(cpu, t);
-       load_TR_desc();
-+
-       load_mm_ldt(&init_mm);
- 
-       clear_all_debug_regs();
-@@ -1594,7 +1598,6 @@ void cpu_init(void)
-       int cpu = smp_processor_id();
-       struct task_struct *curr = current;
-       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
--      struct thread_struct *thread = &curr->thread;
- 
-       wait_for_master_cpu(cpu);
- 
-@@ -1624,9 +1627,13 @@ void cpu_init(void)
-       BUG_ON(curr->mm);
-       enter_lazy_tlb(&init_mm, curr);
- 
--      load_sp0(thread->sp0);
-+      /*
-+       * Initialize the TSS.  Don't bother initializing sp0, as the initial
-+       * task never enters user mode.
-+       */
-       set_tss_desc(cpu, t);
-       load_TR_desc();
-+
-       load_mm_ldt(&init_mm);
- 
-       t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
-diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index 3ca198080ea9..ccf3a4f4ef68 100644
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -48,7 +48,13 @@
-  */
- __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
-       .x86_tss = {
--              .sp0 = TOP_OF_INIT_STACK,
-+              /*
-+               * .sp0 is only used when entering ring 0 from a lower
-+               * privilege level.  Since the init task never runs anything
-+               * but ring 0 code, there is no need for a valid value here.
-+               * Poison it.
-+               */
-+              .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
- #ifdef CONFIG_X86_32
-               .ss0 = __KERNEL_DS,
-               .ss1 = __KERNEL_CS,
--- 
-2.14.2
-
diff --git a/patches/kernel/0103-x86-xen-64-x86-entry-64-Clean-up-SP-code-in-cpu_init.patch b/patches/kernel/0103-x86-xen-64-x86-entry-64-Clean-up-SP-code-in-cpu_init.patch

new file mode 100644 (file)

index 0000000..5c37994
--- /dev/null
+++ b/patches/kernel/0103-x86-xen-64-x86-entry-64-Clean-up-SP-code-in-cpu_init.patch
@@ -0,0 +1,99 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:12 -0700
+Subject: [PATCH] x86/xen/64, x86/entry/64: Clean up SP code in
+ cpu_initialize_context()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+I'm removing thread_struct::sp0, and Xen's usage of it is slightly
+dubious and unnecessary.  Use appropriate helpers instead.
+
+While we're at at, reorder the code slightly to make it more obvious
+what's going on.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/d5b9a3da2b47c68325bd2bbe8f82d9554dee0d0f.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit f16b3da1dc936c0f8121741d0a1731bf242f2f56)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 27c60a1f6c49062151f67042458a523386cc3dc5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/xen/smp_pv.c | 17 ++++++++++++++---
+ 1 file changed, 14 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
+index 51471408fdd1..8c0e047d0b80 100644
+--- a/arch/x86/xen/smp_pv.c
++++ b/arch/x86/xen/smp_pv.c
+@@ -13,6 +13,7 @@
+  * single-threaded.
+  */
+ #include <linux/sched.h>
++#include <linux/sched/task_stack.h>
+ #include <linux/err.h>
+ #include <linux/slab.h>
+ #include <linux/smp.h>
+@@ -293,12 +294,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
+ #endif
+       memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
+ 
++      /*
++       * Bring up the CPU in cpu_bringup_and_idle() with the stack
++       * pointing just below where pt_regs would be if it were a normal
++       * kernel entry.
++       */
+       ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
+       ctxt->flags = VGCF_IN_KERNEL;
+       ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
+       ctxt->user_regs.ds = __USER_DS;
+       ctxt->user_regs.es = __USER_DS;
+       ctxt->user_regs.ss = __KERNEL_DS;
++      ctxt->user_regs.cs = __KERNEL_CS;
++      ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
+ 
+       xen_copy_trap_info(ctxt->trap_ctxt);
+ 
+@@ -313,8 +321,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
+       ctxt->gdt_frames[0] = gdt_mfn;
+       ctxt->gdt_ents      = GDT_ENTRIES;
+ 
++      /*
++       * Set SS:SP that Xen will use when entering guest kernel mode
++       * from guest user mode.  Subsequent calls to load_sp0() can
++       * change this value.
++       */
+       ctxt->kernel_ss = __KERNEL_DS;
+-      ctxt->kernel_sp = idle->thread.sp0;
++      ctxt->kernel_sp = task_top_of_stack(idle);
+ 
+ #ifdef CONFIG_X86_32
+       ctxt->event_callback_cs     = __KERNEL_CS;
+@@ -326,10 +339,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
+               (unsigned long)xen_hypervisor_callback;
+       ctxt->failsafe_callback_eip =
+               (unsigned long)xen_failsafe_callback;
+-      ctxt->user_regs.cs = __KERNEL_CS;
+       per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
+ 
+-      ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
+       ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
+       if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
+               BUG();
+-- 
+2.14.2
+
diff --git a/patches/kernel/0104-x86-entry-64-Remove-all-remaining-direct-thread_stru.patch b/patches/kernel/0104-x86-entry-64-Remove-all-remaining-direct-thread_stru.patch

deleted file mode 100644 (file)

index a9687e1..0000000
--- a/patches/kernel/0104-x86-entry-64-Remove-all-remaining-direct-thread_stru.patch
+++ /dev/null
@@ -1,103 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:14 -0700
-Subject: [PATCH] x86/entry/64: Remove all remaining direct thread_struct::sp0
- reads
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The only remaining readers in context switch code or vm86(), and
-they all just want to update TSS.sp0 to match the current task.
-Replace them all with a new helper update_sp0().
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/2d231687f4ff288c9d9e98d7861b7df374246ac3.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 46f5a10a721ce8dce8cc8fe55279b49e1c6b3288)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit cc87284caa7d31d9d5a55c418eb5278cab6e2db1)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/switch_to.h | 6 ++++++
- arch/x86/kernel/process_32.c     | 2 +-
- arch/x86/kernel/process_64.c     | 2 +-
- arch/x86/kernel/vm86_32.c        | 4 ++--
- 4 files changed, 10 insertions(+), 4 deletions(-)
-
-diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
-index 7ae8caffbada..54e64d909725 100644
---- a/arch/x86/include/asm/switch_to.h
-+++ b/arch/x86/include/asm/switch_to.h
-@@ -84,4 +84,10 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
- }
- #endif
- 
-+/* This is used when switching tasks or entering/exiting vm86 mode. */
-+static inline void update_sp0(struct task_struct *task)
-+{
-+      load_sp0(task->thread.sp0);
-+}
-+
- #endif /* _ASM_X86_SWITCH_TO_H */
-diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
-index 48a3f240f565..c0d60420466c 100644
---- a/arch/x86/kernel/process_32.c
-+++ b/arch/x86/kernel/process_32.c
-@@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-        * current_thread_info().  Refresh the SYSENTER configuration in
-        * case prev or next is vm86.
-        */
--      load_sp0(next->sp0);
-+      update_sp0(next_p);
-       refresh_sysenter_cs(next);
-       this_cpu_write(cpu_current_top_of_stack,
-                      (unsigned long)task_stack_page(next_p) +
-diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
-index 37b933628a8b..8a748e17bf6e 100644
---- a/arch/x86/kernel/process_64.c
-+++ b/arch/x86/kernel/process_64.c
-@@ -466,7 +466,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-       this_cpu_write(current_task, next_p);
- 
-       /* Reload sp0. */
--      load_sp0(next->sp0);
-+      update_sp0(next_p);
- 
-       /*
-        * Now maybe reload the debug registers and handle I/O bitmaps
-diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
-index 0f1d92cd20ad..a7b44c75c642 100644
---- a/arch/x86/kernel/vm86_32.c
-+++ b/arch/x86/kernel/vm86_32.c
-@@ -148,7 +148,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
-       preempt_disable();
-       tsk->thread.sp0 = vm86->saved_sp0;
-       tsk->thread.sysenter_cs = __KERNEL_CS;
--      load_sp0(tsk->thread.sp0);
-+      update_sp0(tsk);
-       refresh_sysenter_cs(&tsk->thread);
-       vm86->saved_sp0 = 0;
-       preempt_enable();
-@@ -373,7 +373,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
-               refresh_sysenter_cs(&tsk->thread);
-       }
- 
--      load_sp0(tsk->thread.sp0);
-+      update_sp0(tsk);
-       preempt_enable();
- 
-       if (vm86->flags & VM86_SCREEN_BITMAP)
--- 
-2.14.2
-
diff --git a/patches/kernel/0104-x86-entry-64-Stop-initializing-TSS.sp0-at-boot.patch b/patches/kernel/0104-x86-entry-64-Stop-initializing-TSS.sp0-at-boot.patch

new file mode 100644 (file)

index 0000000..acf1fd2
--- /dev/null
+++ b/patches/kernel/0104-x86-entry-64-Stop-initializing-TSS.sp0-at-boot.patch
@@ -0,0 +1,102 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:13 -0700
+Subject: [PATCH] x86/entry/64: Stop initializing TSS.sp0 at boot
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+In my quest to get rid of thread_struct::sp0, I want to clean up or
+remove all of its readers.  Two of them are in cpu_init() (32-bit and
+64-bit), and they aren't needed.  This is because we never enter
+userspace at all on the threads that CPUs are initialized in.
+
+Poison the initial TSS.sp0 and stop initializing it on CPU init.
+
+The comment text mostly comes from Dave Hansen.  Thanks!
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/ee4a00540ad28c6cff475fbcc7769a4460acc861.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 20bb83443ea79087b5e5f8dab4e9d80bb9bf7acb)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8c6b12e88bd87433087ea1f1cd5a9a4975e4623c)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/common.c | 13 ++++++++++---
+ arch/x86/kernel/process.c    |  8 +++++++-
+ 2 files changed, 17 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 6562acbfc4e0..121fe3570d6f 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1570,9 +1570,13 @@ void cpu_init(void)
+       BUG_ON(me->mm);
+       enter_lazy_tlb(&init_mm, me);
+ 
+-      load_sp0(current->thread.sp0);
++      /*
++       * Initialize the TSS.  Don't bother initializing sp0, as the initial
++       * task never enters user mode.
++       */
+       set_tss_desc(cpu, t);
+       load_TR_desc();
++
+       load_mm_ldt(&init_mm);
+ 
+       clear_all_debug_regs();
+@@ -1594,7 +1598,6 @@ void cpu_init(void)
+       int cpu = smp_processor_id();
+       struct task_struct *curr = current;
+       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+-      struct thread_struct *thread = &curr->thread;
+ 
+       wait_for_master_cpu(cpu);
+ 
+@@ -1624,9 +1627,13 @@ void cpu_init(void)
+       BUG_ON(curr->mm);
+       enter_lazy_tlb(&init_mm, curr);
+ 
+-      load_sp0(thread->sp0);
++      /*
++       * Initialize the TSS.  Don't bother initializing sp0, as the initial
++       * task never enters user mode.
++       */
+       set_tss_desc(cpu, t);
+       load_TR_desc();
++
+       load_mm_ldt(&init_mm);
+ 
+       t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index 3ca198080ea9..ccf3a4f4ef68 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -48,7 +48,13 @@
+  */
+ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+       .x86_tss = {
+-              .sp0 = TOP_OF_INIT_STACK,
++              /*
++               * .sp0 is only used when entering ring 0 from a lower
++               * privilege level.  Since the init task never runs anything
++               * but ring 0 code, there is no need for a valid value here.
++               * Poison it.
++               */
++              .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
+ #ifdef CONFIG_X86_32
+               .ss0 = __KERNEL_DS,
+               .ss1 = __KERNEL_CS,
+-- 
+2.14.2
+
diff --git a/patches/kernel/0105-x86-entry-32-Fix-cpu_current_top_of_stack-initializa.patch b/patches/kernel/0105-x86-entry-32-Fix-cpu_current_top_of_stack-initializa.patch

deleted file mode 100644 (file)

index 6e49d46..0000000
--- a/patches/kernel/0105-x86-entry-32-Fix-cpu_current_top_of_stack-initializa.patch
+++ /dev/null
@@ -1,51 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:15 -0700
-Subject: [PATCH] x86/entry/32: Fix cpu_current_top_of_stack initialization at
- boot
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-cpu_current_top_of_stack's initialization forgot about
-TOP_OF_KERNEL_STACK_PADDING.  This bug didn't matter because the
-idle threads never enter user mode.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/e5e370a7e6e4fddd1c4e4cf619765d96bb874b21.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit cd493a6deb8b78eca280d05f7fa73fd69403ae29)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 258c98e7d4b8f1459772e656cd736c028a13add9)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/smpboot.c | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
-index d05006f6c31c..8ea3b18cbdc1 100644
---- a/arch/x86/kernel/smpboot.c
-+++ b/arch/x86/kernel/smpboot.c
-@@ -961,8 +961,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
- #ifdef CONFIG_X86_32
-       /* Stack for startup_32 can be just as for start_secondary onwards */
-       irq_ctx_init(cpu);
--      per_cpu(cpu_current_top_of_stack, cpu) =
--              (unsigned long)task_stack_page(idle) + THREAD_SIZE;
-+      per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
- #else
-       initial_gs = per_cpu_offset(cpu);
- #endif
--- 
-2.14.2
-
diff --git a/patches/kernel/0105-x86-entry-64-Remove-all-remaining-direct-thread_stru.patch b/patches/kernel/0105-x86-entry-64-Remove-all-remaining-direct-thread_stru.patch

new file mode 100644 (file)

index 0000000..a9687e1
--- /dev/null
+++ b/patches/kernel/0105-x86-entry-64-Remove-all-remaining-direct-thread_stru.patch
@@ -0,0 +1,103 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:14 -0700
+Subject: [PATCH] x86/entry/64: Remove all remaining direct thread_struct::sp0
+ reads
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The only remaining readers in context switch code or vm86(), and
+they all just want to update TSS.sp0 to match the current task.
+Replace them all with a new helper update_sp0().
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/2d231687f4ff288c9d9e98d7861b7df374246ac3.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 46f5a10a721ce8dce8cc8fe55279b49e1c6b3288)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit cc87284caa7d31d9d5a55c418eb5278cab6e2db1)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/switch_to.h | 6 ++++++
+ arch/x86/kernel/process_32.c     | 2 +-
+ arch/x86/kernel/process_64.c     | 2 +-
+ arch/x86/kernel/vm86_32.c        | 4 ++--
+ 4 files changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
+index 7ae8caffbada..54e64d909725 100644
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -84,4 +84,10 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
+ }
+ #endif
+ 
++/* This is used when switching tasks or entering/exiting vm86 mode. */
++static inline void update_sp0(struct task_struct *task)
++{
++      load_sp0(task->thread.sp0);
++}
++
+ #endif /* _ASM_X86_SWITCH_TO_H */
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index 48a3f240f565..c0d60420466c 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+        * current_thread_info().  Refresh the SYSENTER configuration in
+        * case prev or next is vm86.
+        */
+-      load_sp0(next->sp0);
++      update_sp0(next_p);
+       refresh_sysenter_cs(next);
+       this_cpu_write(cpu_current_top_of_stack,
+                      (unsigned long)task_stack_page(next_p) +
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index 37b933628a8b..8a748e17bf6e 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -466,7 +466,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+       this_cpu_write(current_task, next_p);
+ 
+       /* Reload sp0. */
+-      load_sp0(next->sp0);
++      update_sp0(next_p);
+ 
+       /*
+        * Now maybe reload the debug registers and handle I/O bitmaps
+diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
+index 0f1d92cd20ad..a7b44c75c642 100644
+--- a/arch/x86/kernel/vm86_32.c
++++ b/arch/x86/kernel/vm86_32.c
+@@ -148,7 +148,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
+       preempt_disable();
+       tsk->thread.sp0 = vm86->saved_sp0;
+       tsk->thread.sysenter_cs = __KERNEL_CS;
+-      load_sp0(tsk->thread.sp0);
++      update_sp0(tsk);
+       refresh_sysenter_cs(&tsk->thread);
+       vm86->saved_sp0 = 0;
+       preempt_enable();
+@@ -373,7 +373,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
+               refresh_sysenter_cs(&tsk->thread);
+       }
+ 
+-      load_sp0(tsk->thread.sp0);
++      update_sp0(tsk);
+       preempt_enable();
+ 
+       if (vm86->flags & VM86_SCREEN_BITMAP)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0106-x86-entry-32-Fix-cpu_current_top_of_stack-initializa.patch b/patches/kernel/0106-x86-entry-32-Fix-cpu_current_top_of_stack-initializa.patch

new file mode 100644 (file)

index 0000000..6e49d46
--- /dev/null
+++ b/patches/kernel/0106-x86-entry-32-Fix-cpu_current_top_of_stack-initializa.patch
@@ -0,0 +1,51 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:15 -0700
+Subject: [PATCH] x86/entry/32: Fix cpu_current_top_of_stack initialization at
+ boot
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+cpu_current_top_of_stack's initialization forgot about
+TOP_OF_KERNEL_STACK_PADDING.  This bug didn't matter because the
+idle threads never enter user mode.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/e5e370a7e6e4fddd1c4e4cf619765d96bb874b21.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit cd493a6deb8b78eca280d05f7fa73fd69403ae29)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 258c98e7d4b8f1459772e656cd736c028a13add9)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/smpboot.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index d05006f6c31c..8ea3b18cbdc1 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -961,8 +961,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
+ #ifdef CONFIG_X86_32
+       /* Stack for startup_32 can be just as for start_secondary onwards */
+       irq_ctx_init(cpu);
+-      per_cpu(cpu_current_top_of_stack, cpu) =
+-              (unsigned long)task_stack_page(idle) + THREAD_SIZE;
++      per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
+ #else
+       initial_gs = per_cpu_offset(cpu);
+ #endif
+-- 
+2.14.2
+
diff --git a/patches/kernel/0106-x86-entry-64-Remove-thread_struct-sp0.patch b/patches/kernel/0106-x86-entry-64-Remove-thread_struct-sp0.patch

deleted file mode 100644 (file)

index 3f9fffb..0000000
--- a/patches/kernel/0106-x86-entry-64-Remove-thread_struct-sp0.patch
+++ /dev/null
@@ -1,154 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:16 -0700
-Subject: [PATCH] x86/entry/64: Remove thread_struct::sp0
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-On x86_64, we can easily calculate sp0 when needed instead of
-storing it in thread_struct.
-
-On x86_32, a similar cleanup would be possible, but it would require
-cleaning up the vm86 code first, and that can wait for a later
-cleanup series.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/719cd9c66c548c4350d98a90f050aee8b17f8919.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit d375cf1530595e33961a8844192cddab913650e3)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 4910af19c69a87e9432467f4d7cb78da5fbcc30a)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/compat.h    |  1 +
- arch/x86/include/asm/processor.h | 28 +++++++++-------------------
- arch/x86/include/asm/switch_to.h |  6 ++++++
- arch/x86/kernel/process_64.c     |  1 -
- 4 files changed, 16 insertions(+), 20 deletions(-)
-
-diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
-index 5343c19814b3..948b6d8ec46f 100644
---- a/arch/x86/include/asm/compat.h
-+++ b/arch/x86/include/asm/compat.h
-@@ -6,6 +6,7 @@
-  */
- #include <linux/types.h>
- #include <linux/sched.h>
-+#include <linux/sched/task_stack.h>
- #include <asm/processor.h>
- #include <asm/user32.h>
- #include <asm/unistd.h>
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index f83fbf1b6dd9..cec9a329c0f1 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -423,7 +423,9 @@ typedef struct {
- struct thread_struct {
-       /* Cached TLS descriptors: */
-       struct desc_struct      tls_array[GDT_ENTRY_TLS_ENTRIES];
-+#ifdef CONFIG_X86_32
-       unsigned long           sp0;
-+#endif
-       unsigned long           sp;
- #ifdef CONFIG_X86_32
-       unsigned long           sysenter_cs;
-@@ -790,6 +792,13 @@ static inline void spin_lock_prefetch(const void *x)
- 
- #define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
- 
-+#define task_pt_regs(task) \
-+({                                                                    \
-+      unsigned long __ptr = (unsigned long)task_stack_page(task);     \
-+      __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;             \
-+      ((struct pt_regs *)__ptr) - 1;                                  \
-+})
-+
- #ifdef CONFIG_X86_32
- /*
-  * User space process size: 3GB (default).
-@@ -807,23 +816,6 @@ static inline void spin_lock_prefetch(const void *x)
-       .addr_limit             = KERNEL_DS,                              \
- }
- 
--/*
-- * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
-- * This is necessary to guarantee that the entire "struct pt_regs"
-- * is accessible even if the CPU haven't stored the SS/ESP registers
-- * on the stack (interrupt gate does not save these registers
-- * when switching to the same priv ring).
-- * Therefore beware: accessing the ss/esp fields of the
-- * "struct pt_regs" is possible, but they may contain the
-- * completely wrong values.
-- */
--#define task_pt_regs(task) \
--({                                                                    \
--      unsigned long __ptr = (unsigned long)task_stack_page(task);     \
--      __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;             \
--      ((struct pt_regs *)__ptr) - 1;                                  \
--})
--
- #define KSTK_ESP(task)                (task_pt_regs(task)->sp)
- 
- #else
-@@ -853,11 +845,9 @@ static inline void spin_lock_prefetch(const void *x)
- #define STACK_TOP_MAX         TASK_SIZE_MAX
- 
- #define INIT_THREAD  {                                                \
--      .sp0                    = TOP_OF_INIT_STACK,            \
-       .addr_limit             = KERNEL_DS,                    \
- }
- 
--#define task_pt_regs(tsk)     ((struct pt_regs *)(tsk)->thread.sp0 - 1)
- extern unsigned long KSTK_ESP(struct task_struct *task);
- 
- #endif /* CONFIG_X86_64 */
-diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
-index 54e64d909725..010cd6e4eafc 100644
---- a/arch/x86/include/asm/switch_to.h
-+++ b/arch/x86/include/asm/switch_to.h
-@@ -1,6 +1,8 @@
- #ifndef _ASM_X86_SWITCH_TO_H
- #define _ASM_X86_SWITCH_TO_H
- 
-+#include <linux/sched/task_stack.h>
-+
- struct task_struct; /* one of the stranger aspects of C forward declarations */
- 
- struct task_struct *__switch_to_asm(struct task_struct *prev,
-@@ -87,7 +89,11 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
- /* This is used when switching tasks or entering/exiting vm86 mode. */
- static inline void update_sp0(struct task_struct *task)
- {
-+#ifdef CONFIG_X86_32
-       load_sp0(task->thread.sp0);
-+#else
-+      load_sp0(task_top_of_stack(task));
-+#endif
- }
- 
- #endif /* _ASM_X86_SWITCH_TO_H */
-diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
-index 8a748e17bf6e..b08b9b6c40eb 100644
---- a/arch/x86/kernel/process_64.c
-+++ b/arch/x86/kernel/process_64.c
-@@ -275,7 +275,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
-       struct inactive_task_frame *frame;
-       struct task_struct *me = current;
- 
--      p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
-       childregs = task_pt_regs(p);
-       fork_frame = container_of(childregs, struct fork_frame, regs);
-       frame = &fork_frame->frame;
--- 
-2.14.2
-
diff --git a/patches/kernel/0107-x86-entry-64-Remove-thread_struct-sp0.patch b/patches/kernel/0107-x86-entry-64-Remove-thread_struct-sp0.patch

new file mode 100644 (file)

index 0000000..3f9fffb
--- /dev/null
+++ b/patches/kernel/0107-x86-entry-64-Remove-thread_struct-sp0.patch
@@ -0,0 +1,154 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:16 -0700
+Subject: [PATCH] x86/entry/64: Remove thread_struct::sp0
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+On x86_64, we can easily calculate sp0 when needed instead of
+storing it in thread_struct.
+
+On x86_32, a similar cleanup would be possible, but it would require
+cleaning up the vm86 code first, and that can wait for a later
+cleanup series.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/719cd9c66c548c4350d98a90f050aee8b17f8919.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit d375cf1530595e33961a8844192cddab913650e3)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 4910af19c69a87e9432467f4d7cb78da5fbcc30a)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/compat.h    |  1 +
+ arch/x86/include/asm/processor.h | 28 +++++++++-------------------
+ arch/x86/include/asm/switch_to.h |  6 ++++++
+ arch/x86/kernel/process_64.c     |  1 -
+ 4 files changed, 16 insertions(+), 20 deletions(-)
+
+diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
+index 5343c19814b3..948b6d8ec46f 100644
+--- a/arch/x86/include/asm/compat.h
++++ b/arch/x86/include/asm/compat.h
+@@ -6,6 +6,7 @@
+  */
+ #include <linux/types.h>
+ #include <linux/sched.h>
++#include <linux/sched/task_stack.h>
+ #include <asm/processor.h>
+ #include <asm/user32.h>
+ #include <asm/unistd.h>
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index f83fbf1b6dd9..cec9a329c0f1 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -423,7 +423,9 @@ typedef struct {
+ struct thread_struct {
+       /* Cached TLS descriptors: */
+       struct desc_struct      tls_array[GDT_ENTRY_TLS_ENTRIES];
++#ifdef CONFIG_X86_32
+       unsigned long           sp0;
++#endif
+       unsigned long           sp;
+ #ifdef CONFIG_X86_32
+       unsigned long           sysenter_cs;
+@@ -790,6 +792,13 @@ static inline void spin_lock_prefetch(const void *x)
+ 
+ #define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
+ 
++#define task_pt_regs(task) \
++({                                                                    \
++      unsigned long __ptr = (unsigned long)task_stack_page(task);     \
++      __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;             \
++      ((struct pt_regs *)__ptr) - 1;                                  \
++})
++
+ #ifdef CONFIG_X86_32
+ /*
+  * User space process size: 3GB (default).
+@@ -807,23 +816,6 @@ static inline void spin_lock_prefetch(const void *x)
+       .addr_limit             = KERNEL_DS,                              \
+ }
+ 
+-/*
+- * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
+- * This is necessary to guarantee that the entire "struct pt_regs"
+- * is accessible even if the CPU haven't stored the SS/ESP registers
+- * on the stack (interrupt gate does not save these registers
+- * when switching to the same priv ring).
+- * Therefore beware: accessing the ss/esp fields of the
+- * "struct pt_regs" is possible, but they may contain the
+- * completely wrong values.
+- */
+-#define task_pt_regs(task) \
+-({                                                                    \
+-      unsigned long __ptr = (unsigned long)task_stack_page(task);     \
+-      __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;             \
+-      ((struct pt_regs *)__ptr) - 1;                                  \
+-})
+-
+ #define KSTK_ESP(task)                (task_pt_regs(task)->sp)
+ 
+ #else
+@@ -853,11 +845,9 @@ static inline void spin_lock_prefetch(const void *x)
+ #define STACK_TOP_MAX         TASK_SIZE_MAX
+ 
+ #define INIT_THREAD  {                                                \
+-      .sp0                    = TOP_OF_INIT_STACK,            \
+       .addr_limit             = KERNEL_DS,                    \
+ }
+ 
+-#define task_pt_regs(tsk)     ((struct pt_regs *)(tsk)->thread.sp0 - 1)
+ extern unsigned long KSTK_ESP(struct task_struct *task);
+ 
+ #endif /* CONFIG_X86_64 */
+diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
+index 54e64d909725..010cd6e4eafc 100644
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -1,6 +1,8 @@
+ #ifndef _ASM_X86_SWITCH_TO_H
+ #define _ASM_X86_SWITCH_TO_H
+ 
++#include <linux/sched/task_stack.h>
++
+ struct task_struct; /* one of the stranger aspects of C forward declarations */
+ 
+ struct task_struct *__switch_to_asm(struct task_struct *prev,
+@@ -87,7 +89,11 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
+ /* This is used when switching tasks or entering/exiting vm86 mode. */
+ static inline void update_sp0(struct task_struct *task)
+ {
++#ifdef CONFIG_X86_32
+       load_sp0(task->thread.sp0);
++#else
++      load_sp0(task_top_of_stack(task));
++#endif
+ }
+ 
+ #endif /* _ASM_X86_SWITCH_TO_H */
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index 8a748e17bf6e..b08b9b6c40eb 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -275,7 +275,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
+       struct inactive_task_frame *frame;
+       struct task_struct *me = current;
+ 
+-      p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+       childregs = task_pt_regs(p);
+       fork_frame = container_of(childregs, struct fork_frame, regs);
+       frame = &fork_frame->frame;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0107-x86-traps-Use-a-new-on_thread_stack-helper-to-clean-.patch b/patches/kernel/0107-x86-traps-Use-a-new-on_thread_stack-helper-to-clean-.patch

deleted file mode 100644 (file)

index 4535109..0000000
--- a/patches/kernel/0107-x86-traps-Use-a-new-on_thread_stack-helper-to-clean-.patch
+++ /dev/null
@@ -1,118 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 2 Nov 2017 00:59:17 -0700
-Subject: [PATCH] x86/traps: Use a new on_thread_stack() helper to clean up an
- assertion
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Let's keep the stack-related logic together rather than open-coding
-a comparison in an assertion in the traps code.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/856b15bee1f55017b8f79d3758b0d51c48a08cf8.1509609304.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit 3383642c2f9d4f5b4fa37436db4a109a1a10018c)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 243de7bd3434c50fb07dd0fc84c462236cfcba3e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/processor.h   |  8 ++++++++
- arch/x86/include/asm/thread_info.h | 22 +++++++++++-----------
- arch/x86/kernel/traps.c            |  3 +--
- 3 files changed, 20 insertions(+), 13 deletions(-)
-
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index cec9a329c0f1..79739e5f939a 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -159,6 +159,8 @@ enum cpuid_regs_idx {
- extern struct cpuinfo_x86     boot_cpu_data;
- extern struct cpuinfo_x86     new_cpu_data;
- 
-+#include <linux/thread_info.h>
-+
- extern struct tss_struct      doublefault_tss;
- extern __u32                  cpu_caps_cleared[NCAPINTS];
- extern __u32                  cpu_caps_set[NCAPINTS];
-@@ -534,6 +536,12 @@ static inline unsigned long current_top_of_stack(void)
- #endif
- }
- 
-+static inline bool on_thread_stack(void)
-+{
-+      return (unsigned long)(current_top_of_stack() -
-+                             current_stack_pointer()) < THREAD_SIZE;
-+}
-+
- #ifdef CONFIG_PARAVIRT
- #include <asm/paravirt.h>
- #else
-diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
-index e00e1bd6e7b3..ec8ef3bbb7dc 100644
---- a/arch/x86/include/asm/thread_info.h
-+++ b/arch/x86/include/asm/thread_info.h
-@@ -48,6 +48,17 @@
-  * - this struct shares the supervisor stack pages
-  */
- #ifndef __ASSEMBLY__
-+static inline unsigned long current_stack_pointer(void)
-+{
-+      unsigned long sp;
-+#ifdef CONFIG_X86_64
-+      asm("mov %%rsp,%0" : "=g" (sp));
-+#else
-+      asm("mov %%esp,%0" : "=g" (sp));
-+#endif
-+      return sp;
-+}
-+
- struct task_struct;
- #include <asm/cpufeature.h>
- #include <linux/atomic.h>
-@@ -155,17 +166,6 @@ struct thread_info {
-  */
- #ifndef __ASSEMBLY__
- 
--static inline unsigned long current_stack_pointer(void)
--{
--      unsigned long sp;
--#ifdef CONFIG_X86_64
--      asm("mov %%rsp,%0" : "=g" (sp));
--#else
--      asm("mov %%esp,%0" : "=g" (sp));
--#endif
--      return sp;
--}
--
- /*
-  * Walks up the stack frames to make sure that the specified object is
-  * entirely contained by a single stack frame.
-diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
-index b2157d4a5338..3a46cab2696e 100644
---- a/arch/x86/kernel/traps.c
-+++ b/arch/x86/kernel/traps.c
-@@ -153,8 +153,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
-        * will catch asm bugs and any attempt to use ist_preempt_enable
-        * from double_fault.
-        */
--      BUG_ON((unsigned long)(current_top_of_stack() -
--                             current_stack_pointer()) >= THREAD_SIZE);
-+      BUG_ON(!on_thread_stack());
- 
-       preempt_enable_no_resched();
- }
--- 
-2.14.2
-
diff --git a/patches/kernel/0108-x86-entry-64-Shorten-TEST-instructions.patch b/patches/kernel/0108-x86-entry-64-Shorten-TEST-instructions.patch

deleted file mode 100644 (file)

index 59a5157..0000000
--- a/patches/kernel/0108-x86-entry-64-Shorten-TEST-instructions.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Borislav Petkov <bp@suse.de>
-Date: Thu, 2 Nov 2017 13:09:26 +0100
-Subject: [PATCH] x86/entry/64: Shorten TEST instructions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Convert TESTL to TESTB and save 3 bytes per callsite.
-
-No functionality change.
-
-Signed-off-by: Borislav Petkov <bp@suse.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/20171102120926.4srwerqrr7g72e2k@pd.tnic
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 1e4c4f610f774df6088d7c065b2dd4d22adba698)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 2b5cfca36261d4ce45ebfdf2602d65201fa3c780)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 05501c781c20..2491b3b25b9a 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -620,7 +620,7 @@ GLOBAL(retint_user)
- GLOBAL(swapgs_restore_regs_and_return_to_usermode)
- #ifdef CONFIG_DEBUG_ENTRY
-       /* Assert that pt_regs indicates user mode. */
--      testl   $3, CS(%rsp)
-+      testb   $3, CS(%rsp)
-       jnz     1f
-       ud2
- 1:
-@@ -653,7 +653,7 @@ retint_kernel:
- GLOBAL(restore_regs_and_return_to_kernel)
- #ifdef CONFIG_DEBUG_ENTRY
-       /* Assert that pt_regs indicates kernel mode. */
--      testl   $3, CS(%rsp)
-+      testb   $3, CS(%rsp)
-       jz      1f
-       ud2
- 1:
--- 
-2.14.2
-
diff --git a/patches/kernel/0108-x86-traps-Use-a-new-on_thread_stack-helper-to-clean-.patch b/patches/kernel/0108-x86-traps-Use-a-new-on_thread_stack-helper-to-clean-.patch

new file mode 100644 (file)

index 0000000..4535109
--- /dev/null
+++ b/patches/kernel/0108-x86-traps-Use-a-new-on_thread_stack-helper-to-clean-.patch
@@ -0,0 +1,118 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 2 Nov 2017 00:59:17 -0700
+Subject: [PATCH] x86/traps: Use a new on_thread_stack() helper to clean up an
+ assertion
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Let's keep the stack-related logic together rather than open-coding
+a comparison in an assertion in the traps code.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/856b15bee1f55017b8f79d3758b0d51c48a08cf8.1509609304.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit 3383642c2f9d4f5b4fa37436db4a109a1a10018c)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 243de7bd3434c50fb07dd0fc84c462236cfcba3e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/processor.h   |  8 ++++++++
+ arch/x86/include/asm/thread_info.h | 22 +++++++++++-----------
+ arch/x86/kernel/traps.c            |  3 +--
+ 3 files changed, 20 insertions(+), 13 deletions(-)
+
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index cec9a329c0f1..79739e5f939a 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -159,6 +159,8 @@ enum cpuid_regs_idx {
+ extern struct cpuinfo_x86     boot_cpu_data;
+ extern struct cpuinfo_x86     new_cpu_data;
+ 
++#include <linux/thread_info.h>
++
+ extern struct tss_struct      doublefault_tss;
+ extern __u32                  cpu_caps_cleared[NCAPINTS];
+ extern __u32                  cpu_caps_set[NCAPINTS];
+@@ -534,6 +536,12 @@ static inline unsigned long current_top_of_stack(void)
+ #endif
+ }
+ 
++static inline bool on_thread_stack(void)
++{
++      return (unsigned long)(current_top_of_stack() -
++                             current_stack_pointer()) < THREAD_SIZE;
++}
++
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #else
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index e00e1bd6e7b3..ec8ef3bbb7dc 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -48,6 +48,17 @@
+  * - this struct shares the supervisor stack pages
+  */
+ #ifndef __ASSEMBLY__
++static inline unsigned long current_stack_pointer(void)
++{
++      unsigned long sp;
++#ifdef CONFIG_X86_64
++      asm("mov %%rsp,%0" : "=g" (sp));
++#else
++      asm("mov %%esp,%0" : "=g" (sp));
++#endif
++      return sp;
++}
++
+ struct task_struct;
+ #include <asm/cpufeature.h>
+ #include <linux/atomic.h>
+@@ -155,17 +166,6 @@ struct thread_info {
+  */
+ #ifndef __ASSEMBLY__
+ 
+-static inline unsigned long current_stack_pointer(void)
+-{
+-      unsigned long sp;
+-#ifdef CONFIG_X86_64
+-      asm("mov %%rsp,%0" : "=g" (sp));
+-#else
+-      asm("mov %%esp,%0" : "=g" (sp));
+-#endif
+-      return sp;
+-}
+-
+ /*
+  * Walks up the stack frames to make sure that the specified object is
+  * entirely contained by a single stack frame.
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index b2157d4a5338..3a46cab2696e 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -153,8 +153,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
+        * will catch asm bugs and any attempt to use ist_preempt_enable
+        * from double_fault.
+        */
+-      BUG_ON((unsigned long)(current_top_of_stack() -
+-                             current_stack_pointer()) >= THREAD_SIZE);
++      BUG_ON(!on_thread_stack());
+ 
+       preempt_enable_no_resched();
+ }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0109-x86-cpuid-Replace-set-clear_bit32.patch b/patches/kernel/0109-x86-cpuid-Replace-set-clear_bit32.patch

deleted file mode 100644 (file)

index 7e56665..0000000
--- a/patches/kernel/0109-x86-cpuid-Replace-set-clear_bit32.patch
+++ /dev/null
@@ -1,71 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Thu, 2 Nov 2017 13:22:35 +0100
-Subject: [PATCH] x86/cpuid: Replace set/clear_bit32()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Peter pointed out that the set/clear_bit32() variants are broken in various
-aspects.
-
-Replace them with open coded set/clear_bit() and type cast
-cpu_info::x86_capability as it's done in all other places throughout x86.
-
-Fixes: 0b00de857a64 ("x86/cpuid: Add generic table for CPUID dependencies")
-Reported-by: Peter Ziljstra <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andi Kleen <ak@linux.intel.com>
-(cherry picked from commit 06dd688ddda5819025e014b79aea9af6ab475fa2)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 3e511952bc3ff9b233d418b0a75a8331deb08171)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/cpuid-deps.c | 26 +++++++++++---------------
- 1 file changed, 11 insertions(+), 15 deletions(-)
-
-diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
-index c21f22d836ad..904b0a3c4e53 100644
---- a/arch/x86/kernel/cpu/cpuid-deps.c
-+++ b/arch/x86/kernel/cpu/cpuid-deps.c
-@@ -62,23 +62,19 @@ const static struct cpuid_dep cpuid_deps[] = {
-       {}
- };
- 
--static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit)
--{
--      clear_bit32(bit, c->x86_capability);
--}
--
--static inline void __setup_clear_cpu_cap(unsigned int bit)
--{
--      clear_cpu_cap(&boot_cpu_data, bit);
--      set_bit32(bit, cpu_caps_cleared);
--}
--
- static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
- {
--      if (!c)
--              __setup_clear_cpu_cap(feature);
--      else
--              __clear_cpu_cap(c, feature);
-+      /*
-+       * Note: This could use the non atomic __*_bit() variants, but the
-+       * rest of the cpufeature code uses atomics as well, so keep it for
-+       * consistency. Cleanup all of it separately.
-+       */
-+      if (!c) {
-+              clear_cpu_cap(&boot_cpu_data, feature);
-+              set_bit(feature, (unsigned long *)cpu_caps_cleared);
-+      } else {
-+              clear_bit(feature, (unsigned long *)c->x86_capability);
-+      }
- }
- 
- /* Take the capabilities and the BUG bits into account */
--- 
-2.14.2
-
diff --git a/patches/kernel/0109-x86-entry-64-Shorten-TEST-instructions.patch b/patches/kernel/0109-x86-entry-64-Shorten-TEST-instructions.patch

new file mode 100644 (file)

index 0000000..59a5157
--- /dev/null
+++ b/patches/kernel/0109-x86-entry-64-Shorten-TEST-instructions.patch
@@ -0,0 +1,57 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Thu, 2 Nov 2017 13:09:26 +0100
+Subject: [PATCH] x86/entry/64: Shorten TEST instructions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Convert TESTL to TESTB and save 3 bytes per callsite.
+
+No functionality change.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20171102120926.4srwerqrr7g72e2k@pd.tnic
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 1e4c4f610f774df6088d7c065b2dd4d22adba698)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 2b5cfca36261d4ce45ebfdf2602d65201fa3c780)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 05501c781c20..2491b3b25b9a 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -620,7 +620,7 @@ GLOBAL(retint_user)
+ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+ #ifdef CONFIG_DEBUG_ENTRY
+       /* Assert that pt_regs indicates user mode. */
+-      testl   $3, CS(%rsp)
++      testb   $3, CS(%rsp)
+       jnz     1f
+       ud2
+ 1:
+@@ -653,7 +653,7 @@ retint_kernel:
+ GLOBAL(restore_regs_and_return_to_kernel)
+ #ifdef CONFIG_DEBUG_ENTRY
+       /* Assert that pt_regs indicates kernel mode. */
+-      testl   $3, CS(%rsp)
++      testb   $3, CS(%rsp)
+       jz      1f
+       ud2
+ 1:
+-- 
+2.14.2
+
diff --git a/patches/kernel/0110-bitops-Revert-cbe96375025e-bitops-Add-clear-set_bit3.patch b/patches/kernel/0110-bitops-Revert-cbe96375025e-bitops-Add-clear-set_bit3.patch

deleted file mode 100644 (file)

index e5d86e8..0000000
--- a/patches/kernel/0110-bitops-Revert-cbe96375025e-bitops-Add-clear-set_bit3.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Thu, 2 Nov 2017 13:30:03 +0100
-Subject: [PATCH] bitops: Revert cbe96375025e ("bitops: Add clear/set_bit32()
- to linux/bitops.h")
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-These ops are not endian safe and may break on architectures which have
-aligment requirements.
-
-Reverts: cbe96375025e ("bitops: Add clear/set_bit32() to linux/bitops.h")
-Reported-by: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andi Kleen <ak@linux.intel.com>
-(cherry picked from commit 1943dc07b45e347c52c1bfdd4a37e04a86e399aa)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit adb64d8c852206281ea6ee6590ae35076a219409)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- include/linux/bitops.h | 26 --------------------------
- 1 file changed, 26 deletions(-)
-
-diff --git a/include/linux/bitops.h b/include/linux/bitops.h
-index eb257a96db6d..a83c822c35c2 100644
---- a/include/linux/bitops.h
-+++ b/include/linux/bitops.h
-@@ -226,32 +226,6 @@ static inline unsigned long __ffs64(u64 word)
-       return __ffs((unsigned long)word);
- }
- 
--/*
-- * clear_bit32 - Clear a bit in memory for u32 array
-- * @nr: Bit to clear
-- * @addr: u32 * address of bitmap
-- *
-- * Same as clear_bit, but avoids needing casts for u32 arrays.
-- */
--
--static __always_inline void clear_bit32(long nr, volatile u32 *addr)
--{
--      clear_bit(nr, (volatile unsigned long *)addr);
--}
--
--/*
-- * set_bit32 - Set a bit in memory for u32 array
-- * @nr: Bit to clear
-- * @addr: u32 * address of bitmap
-- *
-- * Same as set_bit, but avoids needing casts for u32 arrays.
-- */
--
--static __always_inline void set_bit32(long nr, volatile u32 *addr)
--{
--      set_bit(nr, (volatile unsigned long *)addr);
--}
--
- #ifdef __KERNEL__
- 
- #ifndef set_mask_bits
--- 
-2.14.2
-
diff --git a/patches/kernel/0110-x86-cpuid-Replace-set-clear_bit32.patch b/patches/kernel/0110-x86-cpuid-Replace-set-clear_bit32.patch

new file mode 100644 (file)

index 0000000..7e56665
--- /dev/null
+++ b/patches/kernel/0110-x86-cpuid-Replace-set-clear_bit32.patch
@@ -0,0 +1,71 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 2 Nov 2017 13:22:35 +0100
+Subject: [PATCH] x86/cpuid: Replace set/clear_bit32()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Peter pointed out that the set/clear_bit32() variants are broken in various
+aspects.
+
+Replace them with open coded set/clear_bit() and type cast
+cpu_info::x86_capability as it's done in all other places throughout x86.
+
+Fixes: 0b00de857a64 ("x86/cpuid: Add generic table for CPUID dependencies")
+Reported-by: Peter Ziljstra <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andi Kleen <ak@linux.intel.com>
+(cherry picked from commit 06dd688ddda5819025e014b79aea9af6ab475fa2)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 3e511952bc3ff9b233d418b0a75a8331deb08171)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/cpuid-deps.c | 26 +++++++++++---------------
+ 1 file changed, 11 insertions(+), 15 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
+index c21f22d836ad..904b0a3c4e53 100644
+--- a/arch/x86/kernel/cpu/cpuid-deps.c
++++ b/arch/x86/kernel/cpu/cpuid-deps.c
+@@ -62,23 +62,19 @@ const static struct cpuid_dep cpuid_deps[] = {
+       {}
+ };
+ 
+-static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit)
+-{
+-      clear_bit32(bit, c->x86_capability);
+-}
+-
+-static inline void __setup_clear_cpu_cap(unsigned int bit)
+-{
+-      clear_cpu_cap(&boot_cpu_data, bit);
+-      set_bit32(bit, cpu_caps_cleared);
+-}
+-
+ static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
+ {
+-      if (!c)
+-              __setup_clear_cpu_cap(feature);
+-      else
+-              __clear_cpu_cap(c, feature);
++      /*
++       * Note: This could use the non atomic __*_bit() variants, but the
++       * rest of the cpufeature code uses atomics as well, so keep it for
++       * consistency. Cleanup all of it separately.
++       */
++      if (!c) {
++              clear_cpu_cap(&boot_cpu_data, feature);
++              set_bit(feature, (unsigned long *)cpu_caps_cleared);
++      } else {
++              clear_bit(feature, (unsigned long *)c->x86_capability);
++      }
+ }
+ 
+ /* Take the capabilities and the BUG bits into account */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0111-bitops-Revert-cbe96375025e-bitops-Add-clear-set_bit3.patch b/patches/kernel/0111-bitops-Revert-cbe96375025e-bitops-Add-clear-set_bit3.patch

new file mode 100644 (file)

index 0000000..e5d86e8
--- /dev/null
+++ b/patches/kernel/0111-bitops-Revert-cbe96375025e-bitops-Add-clear-set_bit3.patch
@@ -0,0 +1,67 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 2 Nov 2017 13:30:03 +0100
+Subject: [PATCH] bitops: Revert cbe96375025e ("bitops: Add clear/set_bit32()
+ to linux/bitops.h")
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+These ops are not endian safe and may break on architectures which have
+aligment requirements.
+
+Reverts: cbe96375025e ("bitops: Add clear/set_bit32() to linux/bitops.h")
+Reported-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andi Kleen <ak@linux.intel.com>
+(cherry picked from commit 1943dc07b45e347c52c1bfdd4a37e04a86e399aa)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit adb64d8c852206281ea6ee6590ae35076a219409)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ include/linux/bitops.h | 26 --------------------------
+ 1 file changed, 26 deletions(-)
+
+diff --git a/include/linux/bitops.h b/include/linux/bitops.h
+index eb257a96db6d..a83c822c35c2 100644
+--- a/include/linux/bitops.h
++++ b/include/linux/bitops.h
+@@ -226,32 +226,6 @@ static inline unsigned long __ffs64(u64 word)
+       return __ffs((unsigned long)word);
+ }
+ 
+-/*
+- * clear_bit32 - Clear a bit in memory for u32 array
+- * @nr: Bit to clear
+- * @addr: u32 * address of bitmap
+- *
+- * Same as clear_bit, but avoids needing casts for u32 arrays.
+- */
+-
+-static __always_inline void clear_bit32(long nr, volatile u32 *addr)
+-{
+-      clear_bit(nr, (volatile unsigned long *)addr);
+-}
+-
+-/*
+- * set_bit32 - Set a bit in memory for u32 array
+- * @nr: Bit to clear
+- * @addr: u32 * address of bitmap
+- *
+- * Same as set_bit, but avoids needing casts for u32 arrays.
+- */
+-
+-static __always_inline void set_bit32(long nr, volatile u32 *addr)
+-{
+-      set_bit(nr, (volatile unsigned long *)addr);
+-}
+-
+ #ifdef __KERNEL__
+ 
+ #ifndef set_mask_bits
+-- 
+2.14.2
+
diff --git a/patches/kernel/0111-x86-mm-Define-_PAGE_TABLE-using-_KERNPG_TABLE.patch b/patches/kernel/0111-x86-mm-Define-_PAGE_TABLE-using-_KERNPG_TABLE.patch

deleted file mode 100644 (file)

index f92c75e..0000000
--- a/patches/kernel/0111-x86-mm-Define-_PAGE_TABLE-using-_KERNPG_TABLE.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Borislav Petkov <bp@suse.de>
-Date: Fri, 3 Nov 2017 11:20:28 +0100
-Subject: [PATCH] x86/mm: Define _PAGE_TABLE using _KERNPG_TABLE
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-... so that the difference is obvious.
-
-No functionality change.
-
-Signed-off-by: Borislav Petkov <bp@suse.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/20171103102028.20284-1-bp@alien8.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit c7da092a1f243bfd1bfb4124f538e69e941882da)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8c69b0c03cd24576ac69c36ede00afae76bab464)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/pgtable_types.h | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
-index bf9638e1ee42..01f6dc938ccb 100644
---- a/arch/x86/include/asm/pgtable_types.h
-+++ b/arch/x86/include/asm/pgtable_types.h
-@@ -121,10 +121,9 @@
- 
- #define _PAGE_PROTNONE        (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
- 
--#define _PAGE_TABLE   (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |        \
--                       _PAGE_ACCESSED | _PAGE_DIRTY)
- #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |    \
-                        _PAGE_DIRTY)
-+#define _PAGE_TABLE   (_KERNPG_TABLE | _PAGE_USER)
- 
- /*
-  * Set of bits not changed in pte_modify.  The pte's
--- 
-2.14.2
-
diff --git a/patches/kernel/0112-x86-cpufeatures-Re-tabulate-the-X86_FEATURE-definiti.patch b/patches/kernel/0112-x86-cpufeatures-Re-tabulate-the-X86_FEATURE-definiti.patch

deleted file mode 100644 (file)

index 3883aa5..0000000
--- a/patches/kernel/0112-x86-cpufeatures-Re-tabulate-the-X86_FEATURE-definiti.patch
+++ /dev/null
@@ -1,623 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Ingo Molnar <mingo@kernel.org>
-Date: Tue, 31 Oct 2017 13:17:22 +0100
-Subject: [PATCH] x86/cpufeatures: Re-tabulate the X86_FEATURE definitions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Over the years asm/cpufeatures.h has become somewhat of a mess: the original
-tabulation style was too narrow, while x86 feature names also kept growing
-in length, creating frequent field width overflows.
-
-Re-tabulate it to make it wider and easier to read/modify. Also harmonize
-the tabulation of the other defines in this file to match it.
-
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andy Lutomirski <luto@amacapital.net>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/20171031121723.28524-3-mingo@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit acbc845ffefd9fb70466182cd8555a26189462b2)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit df7c6e7b62274889a028357a579acfb2215c3f98)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cpufeatures.h | 506 +++++++++++++++++++------------------
- 1 file changed, 254 insertions(+), 252 deletions(-)
-
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index c465bd6613ed..a021b0756af6 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -12,8 +12,8 @@
- /*
-  * Defines x86 CPU feature bits
-  */
--#define NCAPINTS      18      /* N 32-bit words worth of info */
--#define NBUGINTS      1       /* N 32-bit bug flags */
-+#define NCAPINTS                      18         /* N 32-bit words worth of info */
-+#define NBUGINTS                      1          /* N 32-bit bug flags */
- 
- /*
-  * Note: If the comment begins with a quoted string, that string is used
-@@ -27,163 +27,163 @@
-  */
- 
- /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
--#define X86_FEATURE_FPU               ( 0*32+ 0) /* Onboard FPU */
--#define X86_FEATURE_VME               ( 0*32+ 1) /* Virtual Mode Extensions */
--#define X86_FEATURE_DE                ( 0*32+ 2) /* Debugging Extensions */
--#define X86_FEATURE_PSE               ( 0*32+ 3) /* Page Size Extensions */
--#define X86_FEATURE_TSC               ( 0*32+ 4) /* Time Stamp Counter */
--#define X86_FEATURE_MSR               ( 0*32+ 5) /* Model-Specific Registers */
--#define X86_FEATURE_PAE               ( 0*32+ 6) /* Physical Address Extensions */
--#define X86_FEATURE_MCE               ( 0*32+ 7) /* Machine Check Exception */
--#define X86_FEATURE_CX8               ( 0*32+ 8) /* CMPXCHG8 instruction */
--#define X86_FEATURE_APIC      ( 0*32+ 9) /* Onboard APIC */
--#define X86_FEATURE_SEP               ( 0*32+11) /* SYSENTER/SYSEXIT */
--#define X86_FEATURE_MTRR      ( 0*32+12) /* Memory Type Range Registers */
--#define X86_FEATURE_PGE               ( 0*32+13) /* Page Global Enable */
--#define X86_FEATURE_MCA               ( 0*32+14) /* Machine Check Architecture */
--#define X86_FEATURE_CMOV      ( 0*32+15) /* CMOV instructions */
-+#define X86_FEATURE_FPU                       ( 0*32+ 0) /* Onboard FPU */
-+#define X86_FEATURE_VME                       ( 0*32+ 1) /* Virtual Mode Extensions */
-+#define X86_FEATURE_DE                        ( 0*32+ 2) /* Debugging Extensions */
-+#define X86_FEATURE_PSE                       ( 0*32+ 3) /* Page Size Extensions */
-+#define X86_FEATURE_TSC                       ( 0*32+ 4) /* Time Stamp Counter */
-+#define X86_FEATURE_MSR                       ( 0*32+ 5) /* Model-Specific Registers */
-+#define X86_FEATURE_PAE                       ( 0*32+ 6) /* Physical Address Extensions */
-+#define X86_FEATURE_MCE                       ( 0*32+ 7) /* Machine Check Exception */
-+#define X86_FEATURE_CX8                       ( 0*32+ 8) /* CMPXCHG8 instruction */
-+#define X86_FEATURE_APIC              ( 0*32+ 9) /* Onboard APIC */
-+#define X86_FEATURE_SEP                       ( 0*32+11) /* SYSENTER/SYSEXIT */
-+#define X86_FEATURE_MTRR              ( 0*32+12) /* Memory Type Range Registers */
-+#define X86_FEATURE_PGE                       ( 0*32+13) /* Page Global Enable */
-+#define X86_FEATURE_MCA                       ( 0*32+14) /* Machine Check Architecture */
-+#define X86_FEATURE_CMOV              ( 0*32+15) /* CMOV instructions */
-                                         /* (plus FCMOVcc, FCOMI with FPU) */
--#define X86_FEATURE_PAT               ( 0*32+16) /* Page Attribute Table */
--#define X86_FEATURE_PSE36     ( 0*32+17) /* 36-bit PSEs */
--#define X86_FEATURE_PN                ( 0*32+18) /* Processor serial number */
--#define X86_FEATURE_CLFLUSH   ( 0*32+19) /* CLFLUSH instruction */
--#define X86_FEATURE_DS                ( 0*32+21) /* "dts" Debug Store */
--#define X86_FEATURE_ACPI      ( 0*32+22) /* ACPI via MSR */
--#define X86_FEATURE_MMX               ( 0*32+23) /* Multimedia Extensions */
--#define X86_FEATURE_FXSR      ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
--#define X86_FEATURE_XMM               ( 0*32+25) /* "sse" */
--#define X86_FEATURE_XMM2      ( 0*32+26) /* "sse2" */
--#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
--#define X86_FEATURE_HT                ( 0*32+28) /* Hyper-Threading */
--#define X86_FEATURE_ACC               ( 0*32+29) /* "tm" Automatic clock control */
--#define X86_FEATURE_IA64      ( 0*32+30) /* IA-64 processor */
--#define X86_FEATURE_PBE               ( 0*32+31) /* Pending Break Enable */
-+#define X86_FEATURE_PAT                       ( 0*32+16) /* Page Attribute Table */
-+#define X86_FEATURE_PSE36             ( 0*32+17) /* 36-bit PSEs */
-+#define X86_FEATURE_PN                        ( 0*32+18) /* Processor serial number */
-+#define X86_FEATURE_CLFLUSH           ( 0*32+19) /* CLFLUSH instruction */
-+#define X86_FEATURE_DS                        ( 0*32+21) /* "dts" Debug Store */
-+#define X86_FEATURE_ACPI              ( 0*32+22) /* ACPI via MSR */
-+#define X86_FEATURE_MMX                       ( 0*32+23) /* Multimedia Extensions */
-+#define X86_FEATURE_FXSR              ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-+#define X86_FEATURE_XMM                       ( 0*32+25) /* "sse" */
-+#define X86_FEATURE_XMM2              ( 0*32+26) /* "sse2" */
-+#define X86_FEATURE_SELFSNOOP         ( 0*32+27) /* "ss" CPU self snoop */
-+#define X86_FEATURE_HT                        ( 0*32+28) /* Hyper-Threading */
-+#define X86_FEATURE_ACC                       ( 0*32+29) /* "tm" Automatic clock control */
-+#define X86_FEATURE_IA64              ( 0*32+30) /* IA-64 processor */
-+#define X86_FEATURE_PBE                       ( 0*32+31) /* Pending Break Enable */
- 
- /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
- /* Don't duplicate feature flags which are redundant with Intel! */
--#define X86_FEATURE_SYSCALL   ( 1*32+11) /* SYSCALL/SYSRET */
--#define X86_FEATURE_MP                ( 1*32+19) /* MP Capable. */
--#define X86_FEATURE_NX                ( 1*32+20) /* Execute Disable */
--#define X86_FEATURE_MMXEXT    ( 1*32+22) /* AMD MMX extensions */
--#define X86_FEATURE_FXSR_OPT  ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
--#define X86_FEATURE_GBPAGES   ( 1*32+26) /* "pdpe1gb" GB pages */
--#define X86_FEATURE_RDTSCP    ( 1*32+27) /* RDTSCP */
--#define X86_FEATURE_LM                ( 1*32+29) /* Long Mode (x86-64) */
--#define X86_FEATURE_3DNOWEXT  ( 1*32+30) /* AMD 3DNow! extensions */
--#define X86_FEATURE_3DNOW     ( 1*32+31) /* 3DNow! */
-+#define X86_FEATURE_SYSCALL           ( 1*32+11) /* SYSCALL/SYSRET */
-+#define X86_FEATURE_MP                        ( 1*32+19) /* MP Capable. */
-+#define X86_FEATURE_NX                        ( 1*32+20) /* Execute Disable */
-+#define X86_FEATURE_MMXEXT            ( 1*32+22) /* AMD MMX extensions */
-+#define X86_FEATURE_FXSR_OPT          ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
-+#define X86_FEATURE_GBPAGES           ( 1*32+26) /* "pdpe1gb" GB pages */
-+#define X86_FEATURE_RDTSCP            ( 1*32+27) /* RDTSCP */
-+#define X86_FEATURE_LM                        ( 1*32+29) /* Long Mode (x86-64) */
-+#define X86_FEATURE_3DNOWEXT          ( 1*32+30) /* AMD 3DNow! extensions */
-+#define X86_FEATURE_3DNOW             ( 1*32+31) /* 3DNow! */
- 
- /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
--#define X86_FEATURE_RECOVERY  ( 2*32+ 0) /* CPU in recovery mode */
--#define X86_FEATURE_LONGRUN   ( 2*32+ 1) /* Longrun power control */
--#define X86_FEATURE_LRTI      ( 2*32+ 3) /* LongRun table interface */
-+#define X86_FEATURE_RECOVERY          ( 2*32+ 0) /* CPU in recovery mode */
-+#define X86_FEATURE_LONGRUN           ( 2*32+ 1) /* Longrun power control */
-+#define X86_FEATURE_LRTI              ( 2*32+ 3) /* LongRun table interface */
- 
- /* Other features, Linux-defined mapping, word 3 */
- /* This range is used for feature bits which conflict or are synthesized */
--#define X86_FEATURE_CXMMX     ( 3*32+ 0) /* Cyrix MMX extensions */
--#define X86_FEATURE_K6_MTRR   ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
--#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
--#define X86_FEATURE_CENTAUR_MCR       ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-+#define X86_FEATURE_CXMMX             ( 3*32+ 0) /* Cyrix MMX extensions */
-+#define X86_FEATURE_K6_MTRR           ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-+#define X86_FEATURE_CYRIX_ARR         ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-+#define X86_FEATURE_CENTAUR_MCR               ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
- /* cpu types for specific tunings: */
--#define X86_FEATURE_K8                ( 3*32+ 4) /* "" Opteron, Athlon64 */
--#define X86_FEATURE_K7                ( 3*32+ 5) /* "" Athlon */
--#define X86_FEATURE_P3                ( 3*32+ 6) /* "" P3 */
--#define X86_FEATURE_P4                ( 3*32+ 7) /* "" P4 */
--#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
--#define X86_FEATURE_UP                ( 3*32+ 9) /* smp kernel running on up */
--#define X86_FEATURE_ART               ( 3*32+10) /* Platform has always running timer (ART) */
--#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
--#define X86_FEATURE_PEBS      ( 3*32+12) /* Precise-Event Based Sampling */
--#define X86_FEATURE_BTS               ( 3*32+13) /* Branch Trace Store */
--#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
--#define X86_FEATURE_SYSENTER32        ( 3*32+15) /* "" sysenter in ia32 userspace */
--#define X86_FEATURE_REP_GOOD  ( 3*32+16) /* rep microcode works well */
--#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
--#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
--#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
--#define X86_FEATURE_NOPL      ( 3*32+20) /* The NOPL (0F 1F) instructions */
--#define X86_FEATURE_ALWAYS    ( 3*32+21) /* "" Always-present feature */
--#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
--#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
--#define X86_FEATURE_NONSTOP_TSC       ( 3*32+24) /* TSC does not stop in C states */
--#define X86_FEATURE_CPUID     ( 3*32+25) /* CPU has CPUID instruction itself */
--#define X86_FEATURE_EXTD_APICID       ( 3*32+26) /* has extended APICID (8 bits) */
--#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
--#define X86_FEATURE_APERFMPERF        ( 3*32+28) /* APERFMPERF */
--#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
--#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
-+#define X86_FEATURE_K8                        ( 3*32+ 4) /* "" Opteron, Athlon64 */
-+#define X86_FEATURE_K7                        ( 3*32+ 5) /* "" Athlon */
-+#define X86_FEATURE_P3                        ( 3*32+ 6) /* "" P3 */
-+#define X86_FEATURE_P4                        ( 3*32+ 7) /* "" P4 */
-+#define X86_FEATURE_CONSTANT_TSC      ( 3*32+ 8) /* TSC ticks at a constant rate */
-+#define X86_FEATURE_UP                        ( 3*32+ 9) /* smp kernel running on up */
-+#define X86_FEATURE_ART                       ( 3*32+10) /* Platform has always running timer (ART) */
-+#define X86_FEATURE_ARCH_PERFMON      ( 3*32+11) /* Intel Architectural PerfMon */
-+#define X86_FEATURE_PEBS              ( 3*32+12) /* Precise-Event Based Sampling */
-+#define X86_FEATURE_BTS                       ( 3*32+13) /* Branch Trace Store */
-+#define X86_FEATURE_SYSCALL32         ( 3*32+14) /* "" syscall in ia32 userspace */
-+#define X86_FEATURE_SYSENTER32                ( 3*32+15) /* "" sysenter in ia32 userspace */
-+#define X86_FEATURE_REP_GOOD          ( 3*32+16) /* rep microcode works well */
-+#define X86_FEATURE_MFENCE_RDTSC      ( 3*32+17) /* "" Mfence synchronizes RDTSC */
-+#define X86_FEATURE_LFENCE_RDTSC      ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-+#define X86_FEATURE_ACC_POWER         ( 3*32+19) /* AMD Accumulated Power Mechanism */
-+#define X86_FEATURE_NOPL              ( 3*32+20) /* The NOPL (0F 1F) instructions */
-+#define X86_FEATURE_ALWAYS            ( 3*32+21) /* "" Always-present feature */
-+#define X86_FEATURE_XTOPOLOGY         ( 3*32+22) /* cpu topology enum extensions */
-+#define X86_FEATURE_TSC_RELIABLE      ( 3*32+23) /* TSC is known to be reliable */
-+#define X86_FEATURE_NONSTOP_TSC               ( 3*32+24) /* TSC does not stop in C states */
-+#define X86_FEATURE_CPUID             ( 3*32+25) /* CPU has CPUID instruction itself */
-+#define X86_FEATURE_EXTD_APICID               ( 3*32+26) /* has extended APICID (8 bits) */
-+#define X86_FEATURE_AMD_DCM           ( 3*32+27) /* multi-node processor */
-+#define X86_FEATURE_APERFMPERF                ( 3*32+28) /* APERFMPERF */
-+#define X86_FEATURE_NONSTOP_TSC_S3    ( 3*32+30) /* TSC doesn't stop in S3 state */
-+#define X86_FEATURE_TSC_KNOWN_FREQ    ( 3*32+31) /* TSC has known frequency */
- 
- /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
--#define X86_FEATURE_XMM3      ( 4*32+ 0) /* "pni" SSE-3 */
--#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
--#define X86_FEATURE_DTES64    ( 4*32+ 2) /* 64-bit Debug Store */
--#define X86_FEATURE_MWAIT     ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
--#define X86_FEATURE_DSCPL     ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
--#define X86_FEATURE_VMX               ( 4*32+ 5) /* Hardware virtualization */
--#define X86_FEATURE_SMX               ( 4*32+ 6) /* Safer mode */
--#define X86_FEATURE_EST               ( 4*32+ 7) /* Enhanced SpeedStep */
--#define X86_FEATURE_TM2               ( 4*32+ 8) /* Thermal Monitor 2 */
--#define X86_FEATURE_SSSE3     ( 4*32+ 9) /* Supplemental SSE-3 */
--#define X86_FEATURE_CID               ( 4*32+10) /* Context ID */
--#define X86_FEATURE_SDBG      ( 4*32+11) /* Silicon Debug */
--#define X86_FEATURE_FMA               ( 4*32+12) /* Fused multiply-add */
--#define X86_FEATURE_CX16      ( 4*32+13) /* CMPXCHG16B */
--#define X86_FEATURE_XTPR      ( 4*32+14) /* Send Task Priority Messages */
--#define X86_FEATURE_PDCM      ( 4*32+15) /* Performance Capabilities */
--#define X86_FEATURE_PCID      ( 4*32+17) /* Process Context Identifiers */
--#define X86_FEATURE_DCA               ( 4*32+18) /* Direct Cache Access */
--#define X86_FEATURE_XMM4_1    ( 4*32+19) /* "sse4_1" SSE-4.1 */
--#define X86_FEATURE_XMM4_2    ( 4*32+20) /* "sse4_2" SSE-4.2 */
--#define X86_FEATURE_X2APIC    ( 4*32+21) /* x2APIC */
--#define X86_FEATURE_MOVBE     ( 4*32+22) /* MOVBE instruction */
--#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
-+#define X86_FEATURE_XMM3              ( 4*32+ 0) /* "pni" SSE-3 */
-+#define X86_FEATURE_PCLMULQDQ         ( 4*32+ 1) /* PCLMULQDQ instruction */
-+#define X86_FEATURE_DTES64            ( 4*32+ 2) /* 64-bit Debug Store */
-+#define X86_FEATURE_MWAIT             ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
-+#define X86_FEATURE_DSCPL             ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-+#define X86_FEATURE_VMX                       ( 4*32+ 5) /* Hardware virtualization */
-+#define X86_FEATURE_SMX                       ( 4*32+ 6) /* Safer mode */
-+#define X86_FEATURE_EST                       ( 4*32+ 7) /* Enhanced SpeedStep */
-+#define X86_FEATURE_TM2                       ( 4*32+ 8) /* Thermal Monitor 2 */
-+#define X86_FEATURE_SSSE3             ( 4*32+ 9) /* Supplemental SSE-3 */
-+#define X86_FEATURE_CID                       ( 4*32+10) /* Context ID */
-+#define X86_FEATURE_SDBG              ( 4*32+11) /* Silicon Debug */
-+#define X86_FEATURE_FMA                       ( 4*32+12) /* Fused multiply-add */
-+#define X86_FEATURE_CX16              ( 4*32+13) /* CMPXCHG16B */
-+#define X86_FEATURE_XTPR              ( 4*32+14) /* Send Task Priority Messages */
-+#define X86_FEATURE_PDCM              ( 4*32+15) /* Performance Capabilities */
-+#define X86_FEATURE_PCID              ( 4*32+17) /* Process Context Identifiers */
-+#define X86_FEATURE_DCA                       ( 4*32+18) /* Direct Cache Access */
-+#define X86_FEATURE_XMM4_1            ( 4*32+19) /* "sse4_1" SSE-4.1 */
-+#define X86_FEATURE_XMM4_2            ( 4*32+20) /* "sse4_2" SSE-4.2 */
-+#define X86_FEATURE_X2APIC            ( 4*32+21) /* x2APIC */
-+#define X86_FEATURE_MOVBE             ( 4*32+22) /* MOVBE instruction */
-+#define X86_FEATURE_POPCNT            ( 4*32+23) /* POPCNT instruction */
- #define X86_FEATURE_TSC_DEADLINE_TIMER        ( 4*32+24) /* Tsc deadline timer */
--#define X86_FEATURE_AES               ( 4*32+25) /* AES instructions */
--#define X86_FEATURE_XSAVE     ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
--#define X86_FEATURE_OSXSAVE   ( 4*32+27) /* "" XSAVE enabled in the OS */
--#define X86_FEATURE_AVX               ( 4*32+28) /* Advanced Vector Extensions */
--#define X86_FEATURE_F16C      ( 4*32+29) /* 16-bit fp conversions */
--#define X86_FEATURE_RDRAND    ( 4*32+30) /* The RDRAND instruction */
--#define X86_FEATURE_HYPERVISOR        ( 4*32+31) /* Running on a hypervisor */
-+#define X86_FEATURE_AES                       ( 4*32+25) /* AES instructions */
-+#define X86_FEATURE_XSAVE             ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-+#define X86_FEATURE_OSXSAVE           ( 4*32+27) /* "" XSAVE enabled in the OS */
-+#define X86_FEATURE_AVX                       ( 4*32+28) /* Advanced Vector Extensions */
-+#define X86_FEATURE_F16C              ( 4*32+29) /* 16-bit fp conversions */
-+#define X86_FEATURE_RDRAND            ( 4*32+30) /* The RDRAND instruction */
-+#define X86_FEATURE_HYPERVISOR                ( 4*32+31) /* Running on a hypervisor */
- 
- /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
--#define X86_FEATURE_XSTORE    ( 5*32+ 2) /* "rng" RNG present (xstore) */
--#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
--#define X86_FEATURE_XCRYPT    ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
--#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
--#define X86_FEATURE_ACE2      ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
--#define X86_FEATURE_ACE2_EN   ( 5*32+ 9) /* ACE v2 enabled */
--#define X86_FEATURE_PHE               ( 5*32+10) /* PadLock Hash Engine */
--#define X86_FEATURE_PHE_EN    ( 5*32+11) /* PHE enabled */
--#define X86_FEATURE_PMM               ( 5*32+12) /* PadLock Montgomery Multiplier */
--#define X86_FEATURE_PMM_EN    ( 5*32+13) /* PMM enabled */
-+#define X86_FEATURE_XSTORE            ( 5*32+ 2) /* "rng" RNG present (xstore) */
-+#define X86_FEATURE_XSTORE_EN         ( 5*32+ 3) /* "rng_en" RNG enabled */
-+#define X86_FEATURE_XCRYPT            ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-+#define X86_FEATURE_XCRYPT_EN         ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-+#define X86_FEATURE_ACE2              ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-+#define X86_FEATURE_ACE2_EN           ( 5*32+ 9) /* ACE v2 enabled */
-+#define X86_FEATURE_PHE                       ( 5*32+10) /* PadLock Hash Engine */
-+#define X86_FEATURE_PHE_EN            ( 5*32+11) /* PHE enabled */
-+#define X86_FEATURE_PMM                       ( 5*32+12) /* PadLock Montgomery Multiplier */
-+#define X86_FEATURE_PMM_EN            ( 5*32+13) /* PMM enabled */
- 
- /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
--#define X86_FEATURE_LAHF_LM   ( 6*32+ 0) /* LAHF/SAHF in long mode */
--#define X86_FEATURE_CMP_LEGACY        ( 6*32+ 1) /* If yes HyperThreading not valid */
--#define X86_FEATURE_SVM               ( 6*32+ 2) /* Secure virtual machine */
--#define X86_FEATURE_EXTAPIC   ( 6*32+ 3) /* Extended APIC space */
--#define X86_FEATURE_CR8_LEGACY        ( 6*32+ 4) /* CR8 in 32-bit mode */
--#define X86_FEATURE_ABM               ( 6*32+ 5) /* Advanced bit manipulation */
--#define X86_FEATURE_SSE4A     ( 6*32+ 6) /* SSE-4A */
--#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
--#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
--#define X86_FEATURE_OSVW      ( 6*32+ 9) /* OS Visible Workaround */
--#define X86_FEATURE_IBS               ( 6*32+10) /* Instruction Based Sampling */
--#define X86_FEATURE_XOP               ( 6*32+11) /* extended AVX instructions */
--#define X86_FEATURE_SKINIT    ( 6*32+12) /* SKINIT/STGI instructions */
--#define X86_FEATURE_WDT               ( 6*32+13) /* Watchdog timer */
--#define X86_FEATURE_LWP               ( 6*32+15) /* Light Weight Profiling */
--#define X86_FEATURE_FMA4      ( 6*32+16) /* 4 operands MAC instructions */
--#define X86_FEATURE_TCE               ( 6*32+17) /* translation cache extension */
--#define X86_FEATURE_NODEID_MSR        ( 6*32+19) /* NodeId MSR */
--#define X86_FEATURE_TBM               ( 6*32+21) /* trailing bit manipulations */
--#define X86_FEATURE_TOPOEXT   ( 6*32+22) /* topology extensions CPUID leafs */
--#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
--#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
--#define X86_FEATURE_BPEXT     (6*32+26) /* data breakpoint extension */
--#define X86_FEATURE_PTSC      ( 6*32+27) /* performance time-stamp counter */
--#define X86_FEATURE_PERFCTR_L2        ( 6*32+28) /* L2 performance counter extensions */
--#define X86_FEATURE_MWAITX    ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
-+#define X86_FEATURE_LAHF_LM           ( 6*32+ 0) /* LAHF/SAHF in long mode */
-+#define X86_FEATURE_CMP_LEGACY                ( 6*32+ 1) /* If yes HyperThreading not valid */
-+#define X86_FEATURE_SVM                       ( 6*32+ 2) /* Secure virtual machine */
-+#define X86_FEATURE_EXTAPIC           ( 6*32+ 3) /* Extended APIC space */
-+#define X86_FEATURE_CR8_LEGACY                ( 6*32+ 4) /* CR8 in 32-bit mode */
-+#define X86_FEATURE_ABM                       ( 6*32+ 5) /* Advanced bit manipulation */
-+#define X86_FEATURE_SSE4A             ( 6*32+ 6) /* SSE-4A */
-+#define X86_FEATURE_MISALIGNSSE               ( 6*32+ 7) /* Misaligned SSE mode */
-+#define X86_FEATURE_3DNOWPREFETCH     ( 6*32+ 8) /* 3DNow prefetch instructions */
-+#define X86_FEATURE_OSVW              ( 6*32+ 9) /* OS Visible Workaround */
-+#define X86_FEATURE_IBS                       ( 6*32+10) /* Instruction Based Sampling */
-+#define X86_FEATURE_XOP                       ( 6*32+11) /* extended AVX instructions */
-+#define X86_FEATURE_SKINIT            ( 6*32+12) /* SKINIT/STGI instructions */
-+#define X86_FEATURE_WDT                       ( 6*32+13) /* Watchdog timer */
-+#define X86_FEATURE_LWP                       ( 6*32+15) /* Light Weight Profiling */
-+#define X86_FEATURE_FMA4              ( 6*32+16) /* 4 operands MAC instructions */
-+#define X86_FEATURE_TCE                       ( 6*32+17) /* translation cache extension */
-+#define X86_FEATURE_NODEID_MSR                ( 6*32+19) /* NodeId MSR */
-+#define X86_FEATURE_TBM                       ( 6*32+21) /* trailing bit manipulations */
-+#define X86_FEATURE_TOPOEXT           ( 6*32+22) /* topology extensions CPUID leafs */
-+#define X86_FEATURE_PERFCTR_CORE      ( 6*32+23) /* core performance counter extensions */
-+#define X86_FEATURE_PERFCTR_NB                ( 6*32+24) /* NB performance counter extensions */
-+#define X86_FEATURE_BPEXT             (6*32+26) /* data breakpoint extension */
-+#define X86_FEATURE_PTSC              ( 6*32+27) /* performance time-stamp counter */
-+#define X86_FEATURE_PERFCTR_L2                ( 6*32+28) /* Last Level Cache performance counter extensions */
-+#define X86_FEATURE_MWAITX            ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
- 
- /*
-  * Auxiliary flags: Linux defined - For features scattered in various
-@@ -191,150 +191,152 @@
-  *
-  * Reuse free bits when adding new feature flags!
-  */
--#define X86_FEATURE_RING3MWAIT        ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
--#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
--#define X86_FEATURE_CPB               ( 7*32+ 2) /* AMD Core Performance Boost */
--#define X86_FEATURE_EPB               ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
--#define X86_FEATURE_CAT_L3    ( 7*32+ 4) /* Cache Allocation Technology L3 */
--#define X86_FEATURE_CAT_L2    ( 7*32+ 5) /* Cache Allocation Technology L2 */
--#define X86_FEATURE_CDP_L3    ( 7*32+ 6) /* Code and Data Prioritization L3 */
-+#define X86_FEATURE_RING3MWAIT                ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
-+#define X86_FEATURE_CPUID_FAULT               ( 7*32+ 1) /* Intel CPUID faulting */
-+#define X86_FEATURE_CPB                       ( 7*32+ 2) /* AMD Core Performance Boost */
-+#define X86_FEATURE_EPB                       ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-+#define X86_FEATURE_CAT_L3            ( 7*32+ 4) /* Cache Allocation Technology L3 */
-+#define X86_FEATURE_CAT_L2            ( 7*32+ 5) /* Cache Allocation Technology L2 */
-+#define X86_FEATURE_CDP_L3            ( 7*32+ 6) /* Code and Data Prioritization L3 */
- 
--#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
--#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-+#define X86_FEATURE_HW_PSTATE         ( 7*32+ 8) /* AMD HW-PState */
-+#define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-+#define X86_FEATURE_SME                       ( 7*32+10) /* AMD Secure Memory Encryption */
- 
--#define X86_FEATURE_INTEL_PPIN        ( 7*32+14) /* Intel Processor Inventory Number */
--#define X86_FEATURE_INTEL_PT  ( 7*32+15) /* Intel Processor Trace */
--#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
--#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
-+#define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
-+#define X86_FEATURE_INTEL_PT          ( 7*32+15) /* Intel Processor Trace */
-+#define X86_FEATURE_AVX512_4VNNIW     (7*32+16) /* AVX-512 Neural Network Instructions */
-+#define X86_FEATURE_AVX512_4FMAPS     (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
- 
--#define X86_FEATURE_MBA         ( 7*32+18) /* Memory Bandwidth Allocation */
-+#define X86_FEATURE_MBA                       ( 7*32+18) /* Memory Bandwidth Allocation */
- 
- /* Virtualization flags: Linux defined, word 8 */
--#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
--#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
--#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
--#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
--#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
-+#define X86_FEATURE_TPR_SHADOW                ( 8*32+ 0) /* Intel TPR Shadow */
-+#define X86_FEATURE_VNMI              ( 8*32+ 1) /* Intel Virtual NMI */
-+#define X86_FEATURE_FLEXPRIORITY      ( 8*32+ 2) /* Intel FlexPriority */
-+#define X86_FEATURE_EPT                       ( 8*32+ 3) /* Intel Extended Page Table */
-+#define X86_FEATURE_VPID              ( 8*32+ 4) /* Intel Virtual Processor ID */
- 
--#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
--#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
-+#define X86_FEATURE_VMMCALL           ( 8*32+15) /* Prefer vmmcall to vmcall */
-+#define X86_FEATURE_XENPV             ( 8*32+16) /* "" Xen paravirtual guest */
- 
- 
- /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
--#define X86_FEATURE_FSGSBASE  ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
--#define X86_FEATURE_TSC_ADJUST        ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
--#define X86_FEATURE_BMI1      ( 9*32+ 3) /* 1st group bit manipulation extensions */
--#define X86_FEATURE_HLE               ( 9*32+ 4) /* Hardware Lock Elision */
--#define X86_FEATURE_AVX2      ( 9*32+ 5) /* AVX2 instructions */
--#define X86_FEATURE_SMEP      ( 9*32+ 7) /* Supervisor Mode Execution Protection */
--#define X86_FEATURE_BMI2      ( 9*32+ 8) /* 2nd group bit manipulation extensions */
--#define X86_FEATURE_ERMS      ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
--#define X86_FEATURE_INVPCID   ( 9*32+10) /* Invalidate Processor Context ID */
--#define X86_FEATURE_RTM               ( 9*32+11) /* Restricted Transactional Memory */
--#define X86_FEATURE_CQM               ( 9*32+12) /* Cache QoS Monitoring */
--#define X86_FEATURE_MPX               ( 9*32+14) /* Memory Protection Extension */
--#define X86_FEATURE_RDT_A     ( 9*32+15) /* Resource Director Technology Allocation */
--#define X86_FEATURE_AVX512F   ( 9*32+16) /* AVX-512 Foundation */
--#define X86_FEATURE_AVX512DQ  ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
--#define X86_FEATURE_RDSEED    ( 9*32+18) /* The RDSEED instruction */
--#define X86_FEATURE_ADX               ( 9*32+19) /* The ADCX and ADOX instructions */
--#define X86_FEATURE_SMAP      ( 9*32+20) /* Supervisor Mode Access Prevention */
--#define X86_FEATURE_AVX512IFMA  ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
--#define X86_FEATURE_CLFLUSHOPT        ( 9*32+23) /* CLFLUSHOPT instruction */
--#define X86_FEATURE_CLWB      ( 9*32+24) /* CLWB instruction */
--#define X86_FEATURE_AVX512PF  ( 9*32+26) /* AVX-512 Prefetch */
--#define X86_FEATURE_AVX512ER  ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
--#define X86_FEATURE_AVX512CD  ( 9*32+28) /* AVX-512 Conflict Detection */
--#define X86_FEATURE_SHA_NI    ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
--#define X86_FEATURE_AVX512BW  ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
--#define X86_FEATURE_AVX512VL  ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
-+#define X86_FEATURE_FSGSBASE          ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
-+#define X86_FEATURE_TSC_ADJUST                ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
-+#define X86_FEATURE_BMI1              ( 9*32+ 3) /* 1st group bit manipulation extensions */
-+#define X86_FEATURE_HLE                       ( 9*32+ 4) /* Hardware Lock Elision */
-+#define X86_FEATURE_AVX2              ( 9*32+ 5) /* AVX2 instructions */
-+#define X86_FEATURE_SMEP              ( 9*32+ 7) /* Supervisor Mode Execution Protection */
-+#define X86_FEATURE_BMI2              ( 9*32+ 8) /* 2nd group bit manipulation extensions */
-+#define X86_FEATURE_ERMS              ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-+#define X86_FEATURE_INVPCID           ( 9*32+10) /* Invalidate Processor Context ID */
-+#define X86_FEATURE_RTM                       ( 9*32+11) /* Restricted Transactional Memory */
-+#define X86_FEATURE_CQM                       ( 9*32+12) /* Cache QoS Monitoring */
-+#define X86_FEATURE_MPX                       ( 9*32+14) /* Memory Protection Extension */
-+#define X86_FEATURE_RDT_A             ( 9*32+15) /* Resource Director Technology Allocation */
-+#define X86_FEATURE_AVX512F           ( 9*32+16) /* AVX-512 Foundation */
-+#define X86_FEATURE_AVX512DQ          ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
-+#define X86_FEATURE_RDSEED            ( 9*32+18) /* The RDSEED instruction */
-+#define X86_FEATURE_ADX                       ( 9*32+19) /* The ADCX and ADOX instructions */
-+#define X86_FEATURE_SMAP              ( 9*32+20) /* Supervisor Mode Access Prevention */
-+#define X86_FEATURE_AVX512IFMA                ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
-+#define X86_FEATURE_CLFLUSHOPT                ( 9*32+23) /* CLFLUSHOPT instruction */
-+#define X86_FEATURE_CLWB              ( 9*32+24) /* CLWB instruction */
-+#define X86_FEATURE_AVX512PF          ( 9*32+26) /* AVX-512 Prefetch */
-+#define X86_FEATURE_AVX512ER          ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-+#define X86_FEATURE_AVX512CD          ( 9*32+28) /* AVX-512 Conflict Detection */
-+#define X86_FEATURE_SHA_NI            ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-+#define X86_FEATURE_AVX512BW          ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
-+#define X86_FEATURE_AVX512VL          ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
- 
- /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
--#define X86_FEATURE_XSAVEOPT  (10*32+ 0) /* XSAVEOPT */
--#define X86_FEATURE_XSAVEC    (10*32+ 1) /* XSAVEC */
--#define X86_FEATURE_XGETBV1   (10*32+ 2) /* XGETBV with ECX = 1 */
--#define X86_FEATURE_XSAVES    (10*32+ 3) /* XSAVES/XRSTORS */
-+#define X86_FEATURE_XSAVEOPT          (10*32+ 0) /* XSAVEOPT */
-+#define X86_FEATURE_XSAVEC            (10*32+ 1) /* XSAVEC */
-+#define X86_FEATURE_XGETBV1           (10*32+ 2) /* XGETBV with ECX = 1 */
-+#define X86_FEATURE_XSAVES            (10*32+ 3) /* XSAVES/XRSTORS */
- 
- /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
--#define X86_FEATURE_CQM_LLC   (11*32+ 1) /* LLC QoS if 1 */
-+#define X86_FEATURE_CQM_LLC           (11*32+ 1) /* LLC QoS if 1 */
- 
- /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
--#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
--#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
--#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
-+#define X86_FEATURE_CQM_OCCUP_LLC     (12*32+ 0) /* LLC occupancy monitoring if 1 */
-+#define X86_FEATURE_CQM_MBM_TOTAL     (12*32+ 1) /* LLC Total MBM monitoring */
-+#define X86_FEATURE_CQM_MBM_LOCAL     (12*32+ 2) /* LLC Local MBM monitoring */
- 
- /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
--#define X86_FEATURE_CLZERO    (13*32+0) /* CLZERO instruction */
--#define X86_FEATURE_IRPERF    (13*32+1) /* Instructions Retired Count */
-+#define X86_FEATURE_CLZERO            (13*32+0) /* CLZERO instruction */
-+#define X86_FEATURE_IRPERF            (13*32+1) /* Instructions Retired Count */
- 
- /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
--#define X86_FEATURE_DTHERM    (14*32+ 0) /* Digital Thermal Sensor */
--#define X86_FEATURE_IDA               (14*32+ 1) /* Intel Dynamic Acceleration */
--#define X86_FEATURE_ARAT      (14*32+ 2) /* Always Running APIC Timer */
--#define X86_FEATURE_PLN               (14*32+ 4) /* Intel Power Limit Notification */
--#define X86_FEATURE_PTS               (14*32+ 6) /* Intel Package Thermal Status */
--#define X86_FEATURE_HWP               (14*32+ 7) /* Intel Hardware P-states */
--#define X86_FEATURE_HWP_NOTIFY        (14*32+ 8) /* HWP Notification */
--#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
--#define X86_FEATURE_HWP_EPP   (14*32+10) /* HWP Energy Perf. Preference */
--#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-+#define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
-+#define X86_FEATURE_IDA                       (14*32+ 1) /* Intel Dynamic Acceleration */
-+#define X86_FEATURE_ARAT              (14*32+ 2) /* Always Running APIC Timer */
-+#define X86_FEATURE_PLN                       (14*32+ 4) /* Intel Power Limit Notification */
-+#define X86_FEATURE_PTS                       (14*32+ 6) /* Intel Package Thermal Status */
-+#define X86_FEATURE_HWP                       (14*32+ 7) /* Intel Hardware P-states */
-+#define X86_FEATURE_HWP_NOTIFY                (14*32+ 8) /* HWP Notification */
-+#define X86_FEATURE_HWP_ACT_WINDOW    (14*32+ 9) /* HWP Activity Window */
-+#define X86_FEATURE_HWP_EPP           (14*32+10) /* HWP Energy Perf. Preference */
-+#define X86_FEATURE_HWP_PKG_REQ               (14*32+11) /* HWP Package Level Request */
- 
- /* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
--#define X86_FEATURE_NPT               (15*32+ 0) /* Nested Page Table support */
--#define X86_FEATURE_LBRV      (15*32+ 1) /* LBR Virtualization support */
--#define X86_FEATURE_SVML      (15*32+ 2) /* "svm_lock" SVM locking MSR */
--#define X86_FEATURE_NRIPS     (15*32+ 3) /* "nrip_save" SVM next_rip save */
--#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
--#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
--#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
--#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
--#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
--#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
--#define X86_FEATURE_AVIC      (15*32+13) /* Virtual Interrupt Controller */
--#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
-+#define X86_FEATURE_NPT                       (15*32+ 0) /* Nested Page Table support */
-+#define X86_FEATURE_LBRV              (15*32+ 1) /* LBR Virtualization support */
-+#define X86_FEATURE_SVML              (15*32+ 2) /* "svm_lock" SVM locking MSR */
-+#define X86_FEATURE_NRIPS             (15*32+ 3) /* "nrip_save" SVM next_rip save */
-+#define X86_FEATURE_TSCRATEMSR                (15*32+ 4) /* "tsc_scale" TSC scaling support */
-+#define X86_FEATURE_VMCBCLEAN         (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-+#define X86_FEATURE_FLUSHBYASID               (15*32+ 6) /* flush-by-ASID support */
-+#define X86_FEATURE_DECODEASSISTS     (15*32+ 7) /* Decode Assists support */
-+#define X86_FEATURE_PAUSEFILTER               (15*32+10) /* filtered pause intercept */
-+#define X86_FEATURE_PFTHRESHOLD               (15*32+12) /* pause filter threshold */
-+#define X86_FEATURE_AVIC              (15*32+13) /* Virtual Interrupt Controller */
-+#define X86_FEATURE_V_VMSAVE_VMLOAD   (15*32+15) /* Virtual VMSAVE VMLOAD */
-+#define X86_FEATURE_VGIF              (15*32+16) /* Virtual GIF */
- 
- /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
--#define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
--#define X86_FEATURE_PKU               (16*32+ 3) /* Protection Keys for Userspace */
--#define X86_FEATURE_OSPKE     (16*32+ 4) /* OS Protection Keys Enable */
--#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
--#define X86_FEATURE_GFNI      (16*32+ 8) /* Galois Field New Instructions */
--#define X86_FEATURE_VAES      (16*32+ 9) /* Vector AES */
--#define X86_FEATURE_VPCLMULQDQ        (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
--#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */
--#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
--#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
--#define X86_FEATURE_LA57      (16*32+16) /* 5-level page tables */
--#define X86_FEATURE_RDPID     (16*32+22) /* RDPID instruction */
-+#define X86_FEATURE_AVX512VBMI                (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-+#define X86_FEATURE_PKU                       (16*32+ 3) /* Protection Keys for Userspace */
-+#define X86_FEATURE_OSPKE             (16*32+ 4) /* OS Protection Keys Enable */
-+#define X86_FEATURE_AVX512_VBMI2      (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
-+#define X86_FEATURE_GFNI              (16*32+ 8) /* Galois Field New Instructions */
-+#define X86_FEATURE_VAES              (16*32+ 9) /* Vector AES */
-+#define X86_FEATURE_VPCLMULQDQ                (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
-+#define X86_FEATURE_AVX512_VNNI               (16*32+ 11) /* Vector Neural Network Instructions */
-+#define X86_FEATURE_AVX512_BITALG     (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
-+#define X86_FEATURE_AVX512_VPOPCNTDQ  (16*32+14) /* POPCNT for vectors of DW/QW */
-+#define X86_FEATURE_LA57              (16*32+16) /* 5-level page tables */
-+#define X86_FEATURE_RDPID             (16*32+22) /* RDPID instruction */
- 
- /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
--#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
--#define X86_FEATURE_SUCCOR    (17*32+1) /* Uncorrectable error containment and recovery */
--#define X86_FEATURE_SMCA      (17*32+3) /* Scalable MCA */
-+#define X86_FEATURE_OVERFLOW_RECOV    (17*32+0) /* MCA overflow recovery support */
-+#define X86_FEATURE_SUCCOR            (17*32+1) /* Uncorrectable error containment and recovery */
-+#define X86_FEATURE_SMCA              (17*32+3) /* Scalable MCA */
- 
- /*
-  * BUG word(s)
-  */
--#define X86_BUG(x)            (NCAPINTS*32 + (x))
-+#define X86_BUG(x)                    (NCAPINTS*32 + (x))
- 
--#define X86_BUG_F00F          X86_BUG(0) /* Intel F00F */
--#define X86_BUG_FDIV          X86_BUG(1) /* FPU FDIV */
--#define X86_BUG_COMA          X86_BUG(2) /* Cyrix 6x86 coma */
--#define X86_BUG_AMD_TLB_MMATCH        X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
--#define X86_BUG_AMD_APIC_C1E  X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
--#define X86_BUG_11AP          X86_BUG(5) /* Bad local APIC aka 11AP */
--#define X86_BUG_FXSAVE_LEAK   X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
--#define X86_BUG_CLFLUSH_MONITOR       X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
--#define X86_BUG_SYSRET_SS_ATTRS       X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-+#define X86_BUG_F00F                  X86_BUG(0) /* Intel F00F */
-+#define X86_BUG_FDIV                  X86_BUG(1) /* FPU FDIV */
-+#define X86_BUG_COMA                  X86_BUG(2) /* Cyrix 6x86 coma */
-+#define X86_BUG_AMD_TLB_MMATCH                X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
-+#define X86_BUG_AMD_APIC_C1E          X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-+#define X86_BUG_11AP                  X86_BUG(5) /* Bad local APIC aka 11AP */
-+#define X86_BUG_FXSAVE_LEAK           X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-+#define X86_BUG_CLFLUSH_MONITOR               X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-+#define X86_BUG_SYSRET_SS_ATTRS               X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
- #ifdef CONFIG_X86_32
- /*
-  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
-  * to avoid confusion.
-  */
--#define X86_BUG_ESPFIX                X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
-+#define X86_BUG_ESPFIX                        X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
- #endif
--#define X86_BUG_NULL_SEG      X86_BUG(10) /* Nulling a selector preserves the base */
--#define X86_BUG_SWAPGS_FENCE  X86_BUG(11) /* SWAPGS without input dep on GS */
--#define X86_BUG_MONITOR               X86_BUG(12) /* IPI required to wake up remote CPU */
--#define X86_BUG_AMD_E400      X86_BUG(13) /* CPU is among the affected by Erratum 400 */
-+#define X86_BUG_NULL_SEG              X86_BUG(10) /* Nulling a selector preserves the base */
-+#define X86_BUG_SWAPGS_FENCE          X86_BUG(11) /* SWAPGS without input dep on GS */
-+#define X86_BUG_MONITOR                       X86_BUG(12) /* IPI required to wake up remote CPU */
-+#define X86_BUG_AMD_E400              X86_BUG(13) /* CPU is among the affected by Erratum 400 */
- #endif /* _ASM_X86_CPUFEATURES_H */
--- 
-2.14.2
-
diff --git a/patches/kernel/0112-x86-mm-Define-_PAGE_TABLE-using-_KERNPG_TABLE.patch b/patches/kernel/0112-x86-mm-Define-_PAGE_TABLE-using-_KERNPG_TABLE.patch

new file mode 100644 (file)

index 0000000..f92c75e
--- /dev/null
+++ b/patches/kernel/0112-x86-mm-Define-_PAGE_TABLE-using-_KERNPG_TABLE.patch
@@ -0,0 +1,48 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 3 Nov 2017 11:20:28 +0100
+Subject: [PATCH] x86/mm: Define _PAGE_TABLE using _KERNPG_TABLE
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+... so that the difference is obvious.
+
+No functionality change.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20171103102028.20284-1-bp@alien8.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit c7da092a1f243bfd1bfb4124f538e69e941882da)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8c69b0c03cd24576ac69c36ede00afae76bab464)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/pgtable_types.h | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
+index bf9638e1ee42..01f6dc938ccb 100644
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -121,10 +121,9 @@
+ 
+ #define _PAGE_PROTNONE        (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+ 
+-#define _PAGE_TABLE   (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |        \
+-                       _PAGE_ACCESSED | _PAGE_DIRTY)
+ #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |    \
+                        _PAGE_DIRTY)
++#define _PAGE_TABLE   (_KERNPG_TABLE | _PAGE_USER)
+ 
+ /*
+  * Set of bits not changed in pte_modify.  The pte's
+-- 
+2.14.2
+
diff --git a/patches/kernel/0113-x86-cpufeatures-Fix-various-details-in-the-feature-d.patch b/patches/kernel/0113-x86-cpufeatures-Fix-various-details-in-the-feature-d.patch

deleted file mode 100644 (file)

index 0b12f37..0000000
--- a/patches/kernel/0113-x86-cpufeatures-Fix-various-details-in-the-feature-d.patch
+++ /dev/null
@@ -1,369 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Ingo Molnar <mingo@kernel.org>
-Date: Tue, 31 Oct 2017 13:17:23 +0100
-Subject: [PATCH] x86/cpufeatures: Fix various details in the feature
- definitions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Kept this commit separate from the re-tabulation changes, to make
-the changes easier to review:
-
- - add better explanation for entries with no explanation
- - fix/enhance the text of some of the entries
- - fix the vertical alignment of some of the feature number definitions
- - fix inconsistent capitalization
- - ... and lots of other small details
-
-i.e. make it all more of a coherent unit, instead of a patchwork of years of additions.
-
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andy Lutomirski <luto@amacapital.net>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/20171031121723.28524-4-mingo@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit f3a624e901c633593156f7b00ca743a6204a29bc)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 256c600cf0edb23ea5f2d70e7da091c909f5ace6)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cpufeatures.h | 149 ++++++++++++++++++-------------------
- 1 file changed, 74 insertions(+), 75 deletions(-)
-
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index a021b0756af6..6db782ed9cdb 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -19,14 +19,12 @@
-  * Note: If the comment begins with a quoted string, that string is used
-  * in /proc/cpuinfo instead of the macro name.  If the string is "",
-  * this feature bit is not displayed in /proc/cpuinfo at all.
-- */
--
--/*
-+ *
-  * When adding new features here that depend on other features,
-- * please update the table in kernel/cpu/cpuid-deps.c
-+ * please update the table in kernel/cpu/cpuid-deps.c as well.
-  */
- 
--/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-+/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
- #define X86_FEATURE_FPU                       ( 0*32+ 0) /* Onboard FPU */
- #define X86_FEATURE_VME                       ( 0*32+ 1) /* Virtual Mode Extensions */
- #define X86_FEATURE_DE                        ( 0*32+ 2) /* Debugging Extensions */
-@@ -41,8 +39,7 @@
- #define X86_FEATURE_MTRR              ( 0*32+12) /* Memory Type Range Registers */
- #define X86_FEATURE_PGE                       ( 0*32+13) /* Page Global Enable */
- #define X86_FEATURE_MCA                       ( 0*32+14) /* Machine Check Architecture */
--#define X86_FEATURE_CMOV              ( 0*32+15) /* CMOV instructions */
--                                        /* (plus FCMOVcc, FCOMI with FPU) */
-+#define X86_FEATURE_CMOV              ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
- #define X86_FEATURE_PAT                       ( 0*32+16) /* Page Attribute Table */
- #define X86_FEATURE_PSE36             ( 0*32+17) /* 36-bit PSEs */
- #define X86_FEATURE_PN                        ( 0*32+18) /* Processor serial number */
-@@ -62,15 +59,15 @@
- /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
- /* Don't duplicate feature flags which are redundant with Intel! */
- #define X86_FEATURE_SYSCALL           ( 1*32+11) /* SYSCALL/SYSRET */
--#define X86_FEATURE_MP                        ( 1*32+19) /* MP Capable. */
-+#define X86_FEATURE_MP                        ( 1*32+19) /* MP Capable */
- #define X86_FEATURE_NX                        ( 1*32+20) /* Execute Disable */
- #define X86_FEATURE_MMXEXT            ( 1*32+22) /* AMD MMX extensions */
- #define X86_FEATURE_FXSR_OPT          ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
- #define X86_FEATURE_GBPAGES           ( 1*32+26) /* "pdpe1gb" GB pages */
- #define X86_FEATURE_RDTSCP            ( 1*32+27) /* RDTSCP */
--#define X86_FEATURE_LM                        ( 1*32+29) /* Long Mode (x86-64) */
--#define X86_FEATURE_3DNOWEXT          ( 1*32+30) /* AMD 3DNow! extensions */
--#define X86_FEATURE_3DNOW             ( 1*32+31) /* 3DNow! */
-+#define X86_FEATURE_LM                        ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
-+#define X86_FEATURE_3DNOWEXT          ( 1*32+30) /* AMD 3DNow extensions */
-+#define X86_FEATURE_3DNOW             ( 1*32+31) /* 3DNow */
- 
- /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
- #define X86_FEATURE_RECOVERY          ( 2*32+ 0) /* CPU in recovery mode */
-@@ -83,66 +80,67 @@
- #define X86_FEATURE_K6_MTRR           ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
- #define X86_FEATURE_CYRIX_ARR         ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
- #define X86_FEATURE_CENTAUR_MCR               ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
--/* cpu types for specific tunings: */
-+
-+/* CPU types for specific tunings: */
- #define X86_FEATURE_K8                        ( 3*32+ 4) /* "" Opteron, Athlon64 */
- #define X86_FEATURE_K7                        ( 3*32+ 5) /* "" Athlon */
- #define X86_FEATURE_P3                        ( 3*32+ 6) /* "" P3 */
- #define X86_FEATURE_P4                        ( 3*32+ 7) /* "" P4 */
- #define X86_FEATURE_CONSTANT_TSC      ( 3*32+ 8) /* TSC ticks at a constant rate */
--#define X86_FEATURE_UP                        ( 3*32+ 9) /* smp kernel running on up */
--#define X86_FEATURE_ART                       ( 3*32+10) /* Platform has always running timer (ART) */
-+#define X86_FEATURE_UP                        ( 3*32+ 9) /* SMP kernel running on UP */
-+#define X86_FEATURE_ART                       ( 3*32+10) /* Always running timer (ART) */
- #define X86_FEATURE_ARCH_PERFMON      ( 3*32+11) /* Intel Architectural PerfMon */
- #define X86_FEATURE_PEBS              ( 3*32+12) /* Precise-Event Based Sampling */
- #define X86_FEATURE_BTS                       ( 3*32+13) /* Branch Trace Store */
--#define X86_FEATURE_SYSCALL32         ( 3*32+14) /* "" syscall in ia32 userspace */
--#define X86_FEATURE_SYSENTER32                ( 3*32+15) /* "" sysenter in ia32 userspace */
--#define X86_FEATURE_REP_GOOD          ( 3*32+16) /* rep microcode works well */
--#define X86_FEATURE_MFENCE_RDTSC      ( 3*32+17) /* "" Mfence synchronizes RDTSC */
--#define X86_FEATURE_LFENCE_RDTSC      ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-+#define X86_FEATURE_SYSCALL32         ( 3*32+14) /* "" syscall in IA32 userspace */
-+#define X86_FEATURE_SYSENTER32                ( 3*32+15) /* "" sysenter in IA32 userspace */
-+#define X86_FEATURE_REP_GOOD          ( 3*32+16) /* REP microcode works well */
-+#define X86_FEATURE_MFENCE_RDTSC      ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
-+#define X86_FEATURE_LFENCE_RDTSC      ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
- #define X86_FEATURE_ACC_POWER         ( 3*32+19) /* AMD Accumulated Power Mechanism */
- #define X86_FEATURE_NOPL              ( 3*32+20) /* The NOPL (0F 1F) instructions */
- #define X86_FEATURE_ALWAYS            ( 3*32+21) /* "" Always-present feature */
--#define X86_FEATURE_XTOPOLOGY         ( 3*32+22) /* cpu topology enum extensions */
-+#define X86_FEATURE_XTOPOLOGY         ( 3*32+22) /* CPU topology enum extensions */
- #define X86_FEATURE_TSC_RELIABLE      ( 3*32+23) /* TSC is known to be reliable */
- #define X86_FEATURE_NONSTOP_TSC               ( 3*32+24) /* TSC does not stop in C states */
- #define X86_FEATURE_CPUID             ( 3*32+25) /* CPU has CPUID instruction itself */
--#define X86_FEATURE_EXTD_APICID               ( 3*32+26) /* has extended APICID (8 bits) */
--#define X86_FEATURE_AMD_DCM           ( 3*32+27) /* multi-node processor */
--#define X86_FEATURE_APERFMPERF                ( 3*32+28) /* APERFMPERF */
-+#define X86_FEATURE_EXTD_APICID               ( 3*32+26) /* Extended APICID (8 bits) */
-+#define X86_FEATURE_AMD_DCM           ( 3*32+27) /* AMD multi-node processor */
-+#define X86_FEATURE_APERFMPERF                ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
- #define X86_FEATURE_NONSTOP_TSC_S3    ( 3*32+30) /* TSC doesn't stop in S3 state */
- #define X86_FEATURE_TSC_KNOWN_FREQ    ( 3*32+31) /* TSC has known frequency */
- 
--/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-+/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
- #define X86_FEATURE_XMM3              ( 4*32+ 0) /* "pni" SSE-3 */
- #define X86_FEATURE_PCLMULQDQ         ( 4*32+ 1) /* PCLMULQDQ instruction */
- #define X86_FEATURE_DTES64            ( 4*32+ 2) /* 64-bit Debug Store */
--#define X86_FEATURE_MWAIT             ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
--#define X86_FEATURE_DSCPL             ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-+#define X86_FEATURE_MWAIT             ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
-+#define X86_FEATURE_DSCPL             ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
- #define X86_FEATURE_VMX                       ( 4*32+ 5) /* Hardware virtualization */
--#define X86_FEATURE_SMX                       ( 4*32+ 6) /* Safer mode */
-+#define X86_FEATURE_SMX                       ( 4*32+ 6) /* Safer Mode eXtensions */
- #define X86_FEATURE_EST                       ( 4*32+ 7) /* Enhanced SpeedStep */
- #define X86_FEATURE_TM2                       ( 4*32+ 8) /* Thermal Monitor 2 */
- #define X86_FEATURE_SSSE3             ( 4*32+ 9) /* Supplemental SSE-3 */
- #define X86_FEATURE_CID                       ( 4*32+10) /* Context ID */
- #define X86_FEATURE_SDBG              ( 4*32+11) /* Silicon Debug */
- #define X86_FEATURE_FMA                       ( 4*32+12) /* Fused multiply-add */
--#define X86_FEATURE_CX16              ( 4*32+13) /* CMPXCHG16B */
-+#define X86_FEATURE_CX16              ( 4*32+13) /* CMPXCHG16B instruction */
- #define X86_FEATURE_XTPR              ( 4*32+14) /* Send Task Priority Messages */
--#define X86_FEATURE_PDCM              ( 4*32+15) /* Performance Capabilities */
-+#define X86_FEATURE_PDCM              ( 4*32+15) /* Perf/Debug Capabilities MSR */
- #define X86_FEATURE_PCID              ( 4*32+17) /* Process Context Identifiers */
- #define X86_FEATURE_DCA                       ( 4*32+18) /* Direct Cache Access */
- #define X86_FEATURE_XMM4_1            ( 4*32+19) /* "sse4_1" SSE-4.1 */
- #define X86_FEATURE_XMM4_2            ( 4*32+20) /* "sse4_2" SSE-4.2 */
--#define X86_FEATURE_X2APIC            ( 4*32+21) /* x2APIC */
-+#define X86_FEATURE_X2APIC            ( 4*32+21) /* X2APIC */
- #define X86_FEATURE_MOVBE             ( 4*32+22) /* MOVBE instruction */
- #define X86_FEATURE_POPCNT            ( 4*32+23) /* POPCNT instruction */
--#define X86_FEATURE_TSC_DEADLINE_TIMER        ( 4*32+24) /* Tsc deadline timer */
-+#define X86_FEATURE_TSC_DEADLINE_TIMER        ( 4*32+24) /* TSC deadline timer */
- #define X86_FEATURE_AES                       ( 4*32+25) /* AES instructions */
--#define X86_FEATURE_XSAVE             ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
--#define X86_FEATURE_OSXSAVE           ( 4*32+27) /* "" XSAVE enabled in the OS */
-+#define X86_FEATURE_XSAVE             ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
-+#define X86_FEATURE_OSXSAVE           ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
- #define X86_FEATURE_AVX                       ( 4*32+28) /* Advanced Vector Extensions */
--#define X86_FEATURE_F16C              ( 4*32+29) /* 16-bit fp conversions */
--#define X86_FEATURE_RDRAND            ( 4*32+30) /* The RDRAND instruction */
-+#define X86_FEATURE_F16C              ( 4*32+29) /* 16-bit FP conversions */
-+#define X86_FEATURE_RDRAND            ( 4*32+30) /* RDRAND instruction */
- #define X86_FEATURE_HYPERVISOR                ( 4*32+31) /* Running on a hypervisor */
- 
- /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-@@ -157,10 +155,10 @@
- #define X86_FEATURE_PMM                       ( 5*32+12) /* PadLock Montgomery Multiplier */
- #define X86_FEATURE_PMM_EN            ( 5*32+13) /* PMM enabled */
- 
--/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-+/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
- #define X86_FEATURE_LAHF_LM           ( 6*32+ 0) /* LAHF/SAHF in long mode */
- #define X86_FEATURE_CMP_LEGACY                ( 6*32+ 1) /* If yes HyperThreading not valid */
--#define X86_FEATURE_SVM                       ( 6*32+ 2) /* Secure virtual machine */
-+#define X86_FEATURE_SVM                       ( 6*32+ 2) /* Secure Virtual Machine */
- #define X86_FEATURE_EXTAPIC           ( 6*32+ 3) /* Extended APIC space */
- #define X86_FEATURE_CR8_LEGACY                ( 6*32+ 4) /* CR8 in 32-bit mode */
- #define X86_FEATURE_ABM                       ( 6*32+ 5) /* Advanced bit manipulation */
-@@ -174,16 +172,16 @@
- #define X86_FEATURE_WDT                       ( 6*32+13) /* Watchdog timer */
- #define X86_FEATURE_LWP                       ( 6*32+15) /* Light Weight Profiling */
- #define X86_FEATURE_FMA4              ( 6*32+16) /* 4 operands MAC instructions */
--#define X86_FEATURE_TCE                       ( 6*32+17) /* translation cache extension */
-+#define X86_FEATURE_TCE                       ( 6*32+17) /* Translation Cache Extension */
- #define X86_FEATURE_NODEID_MSR                ( 6*32+19) /* NodeId MSR */
--#define X86_FEATURE_TBM                       ( 6*32+21) /* trailing bit manipulations */
--#define X86_FEATURE_TOPOEXT           ( 6*32+22) /* topology extensions CPUID leafs */
--#define X86_FEATURE_PERFCTR_CORE      ( 6*32+23) /* core performance counter extensions */
-+#define X86_FEATURE_TBM                       ( 6*32+21) /* Trailing Bit Manipulations */
-+#define X86_FEATURE_TOPOEXT           ( 6*32+22) /* Topology extensions CPUID leafs */
-+#define X86_FEATURE_PERFCTR_CORE      ( 6*32+23) /* Core performance counter extensions */
- #define X86_FEATURE_PERFCTR_NB                ( 6*32+24) /* NB performance counter extensions */
--#define X86_FEATURE_BPEXT             (6*32+26) /* data breakpoint extension */
--#define X86_FEATURE_PTSC              ( 6*32+27) /* performance time-stamp counter */
-+#define X86_FEATURE_BPEXT             ( 6*32+26) /* Data breakpoint extension */
-+#define X86_FEATURE_PTSC              ( 6*32+27) /* Performance time-stamp counter */
- #define X86_FEATURE_PERFCTR_L2                ( 6*32+28) /* Last Level Cache performance counter extensions */
--#define X86_FEATURE_MWAITX            ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
-+#define X86_FEATURE_MWAITX            ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
- 
- /*
-  * Auxiliary flags: Linux defined - For features scattered in various
-@@ -191,7 +189,7 @@
-  *
-  * Reuse free bits when adding new feature flags!
-  */
--#define X86_FEATURE_RING3MWAIT                ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
-+#define X86_FEATURE_RING3MWAIT                ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
- #define X86_FEATURE_CPUID_FAULT               ( 7*32+ 1) /* Intel CPUID faulting */
- #define X86_FEATURE_CPB                       ( 7*32+ 2) /* AMD Core Performance Boost */
- #define X86_FEATURE_EPB                       ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-@@ -205,8 +203,8 @@
- 
- #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
- #define X86_FEATURE_INTEL_PT          ( 7*32+15) /* Intel Processor Trace */
--#define X86_FEATURE_AVX512_4VNNIW     (7*32+16) /* AVX-512 Neural Network Instructions */
--#define X86_FEATURE_AVX512_4FMAPS     (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
-+#define X86_FEATURE_AVX512_4VNNIW     ( 7*32+16) /* AVX-512 Neural Network Instructions */
-+#define X86_FEATURE_AVX512_4FMAPS     ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
- 
- #define X86_FEATURE_MBA                       ( 7*32+18) /* Memory Bandwidth Allocation */
- 
-@@ -217,19 +215,19 @@
- #define X86_FEATURE_EPT                       ( 8*32+ 3) /* Intel Extended Page Table */
- #define X86_FEATURE_VPID              ( 8*32+ 4) /* Intel Virtual Processor ID */
- 
--#define X86_FEATURE_VMMCALL           ( 8*32+15) /* Prefer vmmcall to vmcall */
-+#define X86_FEATURE_VMMCALL           ( 8*32+15) /* Prefer VMMCALL to VMCALL */
- #define X86_FEATURE_XENPV             ( 8*32+16) /* "" Xen paravirtual guest */
- 
- 
--/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
--#define X86_FEATURE_FSGSBASE          ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
--#define X86_FEATURE_TSC_ADJUST                ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
-+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
-+#define X86_FEATURE_FSGSBASE          ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
-+#define X86_FEATURE_TSC_ADJUST                ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
- #define X86_FEATURE_BMI1              ( 9*32+ 3) /* 1st group bit manipulation extensions */
- #define X86_FEATURE_HLE                       ( 9*32+ 4) /* Hardware Lock Elision */
- #define X86_FEATURE_AVX2              ( 9*32+ 5) /* AVX2 instructions */
- #define X86_FEATURE_SMEP              ( 9*32+ 7) /* Supervisor Mode Execution Protection */
- #define X86_FEATURE_BMI2              ( 9*32+ 8) /* 2nd group bit manipulation extensions */
--#define X86_FEATURE_ERMS              ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-+#define X86_FEATURE_ERMS              ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
- #define X86_FEATURE_INVPCID           ( 9*32+10) /* Invalidate Processor Context ID */
- #define X86_FEATURE_RTM                       ( 9*32+11) /* Restricted Transactional Memory */
- #define X86_FEATURE_CQM                       ( 9*32+12) /* Cache QoS Monitoring */
-@@ -237,8 +235,8 @@
- #define X86_FEATURE_RDT_A             ( 9*32+15) /* Resource Director Technology Allocation */
- #define X86_FEATURE_AVX512F           ( 9*32+16) /* AVX-512 Foundation */
- #define X86_FEATURE_AVX512DQ          ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
--#define X86_FEATURE_RDSEED            ( 9*32+18) /* The RDSEED instruction */
--#define X86_FEATURE_ADX                       ( 9*32+19) /* The ADCX and ADOX instructions */
-+#define X86_FEATURE_RDSEED            ( 9*32+18) /* RDSEED instruction */
-+#define X86_FEATURE_ADX                       ( 9*32+19) /* ADCX and ADOX instructions */
- #define X86_FEATURE_SMAP              ( 9*32+20) /* Supervisor Mode Access Prevention */
- #define X86_FEATURE_AVX512IFMA                ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
- #define X86_FEATURE_CLFLUSHOPT                ( 9*32+23) /* CLFLUSHOPT instruction */
-@@ -250,25 +248,25 @@
- #define X86_FEATURE_AVX512BW          ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
- #define X86_FEATURE_AVX512VL          ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
- 
--/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
--#define X86_FEATURE_XSAVEOPT          (10*32+ 0) /* XSAVEOPT */
--#define X86_FEATURE_XSAVEC            (10*32+ 1) /* XSAVEC */
--#define X86_FEATURE_XGETBV1           (10*32+ 2) /* XGETBV with ECX = 1 */
--#define X86_FEATURE_XSAVES            (10*32+ 3) /* XSAVES/XRSTORS */
-+/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
-+#define X86_FEATURE_XSAVEOPT          (10*32+ 0) /* XSAVEOPT instruction */
-+#define X86_FEATURE_XSAVEC            (10*32+ 1) /* XSAVEC instruction */
-+#define X86_FEATURE_XGETBV1           (10*32+ 2) /* XGETBV with ECX = 1 instruction */
-+#define X86_FEATURE_XSAVES            (10*32+ 3) /* XSAVES/XRSTORS instructions */
- 
--/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
-+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
- #define X86_FEATURE_CQM_LLC           (11*32+ 1) /* LLC QoS if 1 */
- 
--/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
--#define X86_FEATURE_CQM_OCCUP_LLC     (12*32+ 0) /* LLC occupancy monitoring if 1 */
-+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
-+#define X86_FEATURE_CQM_OCCUP_LLC     (12*32+ 0) /* LLC occupancy monitoring */
- #define X86_FEATURE_CQM_MBM_TOTAL     (12*32+ 1) /* LLC Total MBM monitoring */
- #define X86_FEATURE_CQM_MBM_LOCAL     (12*32+ 2) /* LLC Local MBM monitoring */
- 
--/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
--#define X86_FEATURE_CLZERO            (13*32+0) /* CLZERO instruction */
--#define X86_FEATURE_IRPERF            (13*32+1) /* Instructions Retired Count */
-+/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
-+#define X86_FEATURE_CLZERO            (13*32+ 0) /* CLZERO instruction */
-+#define X86_FEATURE_IRPERF            (13*32+ 1) /* Instructions Retired Count */
- 
--/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
-+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
- #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
- #define X86_FEATURE_IDA                       (14*32+ 1) /* Intel Dynamic Acceleration */
- #define X86_FEATURE_ARAT              (14*32+ 2) /* Always Running APIC Timer */
-@@ -280,7 +278,7 @@
- #define X86_FEATURE_HWP_EPP           (14*32+10) /* HWP Energy Perf. Preference */
- #define X86_FEATURE_HWP_PKG_REQ               (14*32+11) /* HWP Package Level Request */
- 
--/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
-+/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
- #define X86_FEATURE_NPT                       (15*32+ 0) /* Nested Page Table support */
- #define X86_FEATURE_LBRV              (15*32+ 1) /* LBR Virtualization support */
- #define X86_FEATURE_SVML              (15*32+ 2) /* "svm_lock" SVM locking MSR */
-@@ -295,24 +293,24 @@
- #define X86_FEATURE_V_VMSAVE_VMLOAD   (15*32+15) /* Virtual VMSAVE VMLOAD */
- #define X86_FEATURE_VGIF              (15*32+16) /* Virtual GIF */
- 
--/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
-+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
- #define X86_FEATURE_AVX512VBMI                (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
- #define X86_FEATURE_PKU                       (16*32+ 3) /* Protection Keys for Userspace */
- #define X86_FEATURE_OSPKE             (16*32+ 4) /* OS Protection Keys Enable */
- #define X86_FEATURE_AVX512_VBMI2      (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
- #define X86_FEATURE_GFNI              (16*32+ 8) /* Galois Field New Instructions */
- #define X86_FEATURE_VAES              (16*32+ 9) /* Vector AES */
--#define X86_FEATURE_VPCLMULQDQ                (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
--#define X86_FEATURE_AVX512_VNNI               (16*32+ 11) /* Vector Neural Network Instructions */
--#define X86_FEATURE_AVX512_BITALG     (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
-+#define X86_FEATURE_VPCLMULQDQ                (16*32+10) /* Carry-Less Multiplication Double Quadword */
-+#define X86_FEATURE_AVX512_VNNI               (16*32+11) /* Vector Neural Network Instructions */
-+#define X86_FEATURE_AVX512_BITALG     (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
- #define X86_FEATURE_AVX512_VPOPCNTDQ  (16*32+14) /* POPCNT for vectors of DW/QW */
- #define X86_FEATURE_LA57              (16*32+16) /* 5-level page tables */
- #define X86_FEATURE_RDPID             (16*32+22) /* RDPID instruction */
- 
--/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
--#define X86_FEATURE_OVERFLOW_RECOV    (17*32+0) /* MCA overflow recovery support */
--#define X86_FEATURE_SUCCOR            (17*32+1) /* Uncorrectable error containment and recovery */
--#define X86_FEATURE_SMCA              (17*32+3) /* Scalable MCA */
-+/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
-+#define X86_FEATURE_OVERFLOW_RECOV    (17*32+ 0) /* MCA overflow recovery support */
-+#define X86_FEATURE_SUCCOR            (17*32+ 1) /* Uncorrectable error containment and recovery */
-+#define X86_FEATURE_SMCA              (17*32+ 3) /* Scalable MCA */
- 
- /*
-  * BUG word(s)
-@@ -339,4 +337,5 @@
- #define X86_BUG_SWAPGS_FENCE          X86_BUG(11) /* SWAPGS without input dep on GS */
- #define X86_BUG_MONITOR                       X86_BUG(12) /* IPI required to wake up remote CPU */
- #define X86_BUG_AMD_E400              X86_BUG(13) /* CPU is among the affected by Erratum 400 */
-+
- #endif /* _ASM_X86_CPUFEATURES_H */
--- 
-2.14.2
-
diff --git a/patches/kernel/0113-x86-cpufeatures-Re-tabulate-the-X86_FEATURE-definiti.patch b/patches/kernel/0113-x86-cpufeatures-Re-tabulate-the-X86_FEATURE-definiti.patch

new file mode 100644 (file)

index 0000000..3883aa5
--- /dev/null
+++ b/patches/kernel/0113-x86-cpufeatures-Re-tabulate-the-X86_FEATURE-definiti.patch
@@ -0,0 +1,623 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@kernel.org>
+Date: Tue, 31 Oct 2017 13:17:22 +0100
+Subject: [PATCH] x86/cpufeatures: Re-tabulate the X86_FEATURE definitions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Over the years asm/cpufeatures.h has become somewhat of a mess: the original
+tabulation style was too narrow, while x86 feature names also kept growing
+in length, creating frequent field width overflows.
+
+Re-tabulate it to make it wider and easier to read/modify. Also harmonize
+the tabulation of the other defines in this file to match it.
+
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20171031121723.28524-3-mingo@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit acbc845ffefd9fb70466182cd8555a26189462b2)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit df7c6e7b62274889a028357a579acfb2215c3f98)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cpufeatures.h | 506 +++++++++++++++++++------------------
+ 1 file changed, 254 insertions(+), 252 deletions(-)
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index c465bd6613ed..a021b0756af6 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -12,8 +12,8 @@
+ /*
+  * Defines x86 CPU feature bits
+  */
+-#define NCAPINTS      18      /* N 32-bit words worth of info */
+-#define NBUGINTS      1       /* N 32-bit bug flags */
++#define NCAPINTS                      18         /* N 32-bit words worth of info */
++#define NBUGINTS                      1          /* N 32-bit bug flags */
+ 
+ /*
+  * Note: If the comment begins with a quoted string, that string is used
+@@ -27,163 +27,163 @@
+  */
+ 
+ /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+-#define X86_FEATURE_FPU               ( 0*32+ 0) /* Onboard FPU */
+-#define X86_FEATURE_VME               ( 0*32+ 1) /* Virtual Mode Extensions */
+-#define X86_FEATURE_DE                ( 0*32+ 2) /* Debugging Extensions */
+-#define X86_FEATURE_PSE               ( 0*32+ 3) /* Page Size Extensions */
+-#define X86_FEATURE_TSC               ( 0*32+ 4) /* Time Stamp Counter */
+-#define X86_FEATURE_MSR               ( 0*32+ 5) /* Model-Specific Registers */
+-#define X86_FEATURE_PAE               ( 0*32+ 6) /* Physical Address Extensions */
+-#define X86_FEATURE_MCE               ( 0*32+ 7) /* Machine Check Exception */
+-#define X86_FEATURE_CX8               ( 0*32+ 8) /* CMPXCHG8 instruction */
+-#define X86_FEATURE_APIC      ( 0*32+ 9) /* Onboard APIC */
+-#define X86_FEATURE_SEP               ( 0*32+11) /* SYSENTER/SYSEXIT */
+-#define X86_FEATURE_MTRR      ( 0*32+12) /* Memory Type Range Registers */
+-#define X86_FEATURE_PGE               ( 0*32+13) /* Page Global Enable */
+-#define X86_FEATURE_MCA               ( 0*32+14) /* Machine Check Architecture */
+-#define X86_FEATURE_CMOV      ( 0*32+15) /* CMOV instructions */
++#define X86_FEATURE_FPU                       ( 0*32+ 0) /* Onboard FPU */
++#define X86_FEATURE_VME                       ( 0*32+ 1) /* Virtual Mode Extensions */
++#define X86_FEATURE_DE                        ( 0*32+ 2) /* Debugging Extensions */
++#define X86_FEATURE_PSE                       ( 0*32+ 3) /* Page Size Extensions */
++#define X86_FEATURE_TSC                       ( 0*32+ 4) /* Time Stamp Counter */
++#define X86_FEATURE_MSR                       ( 0*32+ 5) /* Model-Specific Registers */
++#define X86_FEATURE_PAE                       ( 0*32+ 6) /* Physical Address Extensions */
++#define X86_FEATURE_MCE                       ( 0*32+ 7) /* Machine Check Exception */
++#define X86_FEATURE_CX8                       ( 0*32+ 8) /* CMPXCHG8 instruction */
++#define X86_FEATURE_APIC              ( 0*32+ 9) /* Onboard APIC */
++#define X86_FEATURE_SEP                       ( 0*32+11) /* SYSENTER/SYSEXIT */
++#define X86_FEATURE_MTRR              ( 0*32+12) /* Memory Type Range Registers */
++#define X86_FEATURE_PGE                       ( 0*32+13) /* Page Global Enable */
++#define X86_FEATURE_MCA                       ( 0*32+14) /* Machine Check Architecture */
++#define X86_FEATURE_CMOV              ( 0*32+15) /* CMOV instructions */
+                                         /* (plus FCMOVcc, FCOMI with FPU) */
+-#define X86_FEATURE_PAT               ( 0*32+16) /* Page Attribute Table */
+-#define X86_FEATURE_PSE36     ( 0*32+17) /* 36-bit PSEs */
+-#define X86_FEATURE_PN                ( 0*32+18) /* Processor serial number */
+-#define X86_FEATURE_CLFLUSH   ( 0*32+19) /* CLFLUSH instruction */
+-#define X86_FEATURE_DS                ( 0*32+21) /* "dts" Debug Store */
+-#define X86_FEATURE_ACPI      ( 0*32+22) /* ACPI via MSR */
+-#define X86_FEATURE_MMX               ( 0*32+23) /* Multimedia Extensions */
+-#define X86_FEATURE_FXSR      ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+-#define X86_FEATURE_XMM               ( 0*32+25) /* "sse" */
+-#define X86_FEATURE_XMM2      ( 0*32+26) /* "sse2" */
+-#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
+-#define X86_FEATURE_HT                ( 0*32+28) /* Hyper-Threading */
+-#define X86_FEATURE_ACC               ( 0*32+29) /* "tm" Automatic clock control */
+-#define X86_FEATURE_IA64      ( 0*32+30) /* IA-64 processor */
+-#define X86_FEATURE_PBE               ( 0*32+31) /* Pending Break Enable */
++#define X86_FEATURE_PAT                       ( 0*32+16) /* Page Attribute Table */
++#define X86_FEATURE_PSE36             ( 0*32+17) /* 36-bit PSEs */
++#define X86_FEATURE_PN                        ( 0*32+18) /* Processor serial number */
++#define X86_FEATURE_CLFLUSH           ( 0*32+19) /* CLFLUSH instruction */
++#define X86_FEATURE_DS                        ( 0*32+21) /* "dts" Debug Store */
++#define X86_FEATURE_ACPI              ( 0*32+22) /* ACPI via MSR */
++#define X86_FEATURE_MMX                       ( 0*32+23) /* Multimedia Extensions */
++#define X86_FEATURE_FXSR              ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
++#define X86_FEATURE_XMM                       ( 0*32+25) /* "sse" */
++#define X86_FEATURE_XMM2              ( 0*32+26) /* "sse2" */
++#define X86_FEATURE_SELFSNOOP         ( 0*32+27) /* "ss" CPU self snoop */
++#define X86_FEATURE_HT                        ( 0*32+28) /* Hyper-Threading */
++#define X86_FEATURE_ACC                       ( 0*32+29) /* "tm" Automatic clock control */
++#define X86_FEATURE_IA64              ( 0*32+30) /* IA-64 processor */
++#define X86_FEATURE_PBE                       ( 0*32+31) /* Pending Break Enable */
+ 
+ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+ /* Don't duplicate feature flags which are redundant with Intel! */
+-#define X86_FEATURE_SYSCALL   ( 1*32+11) /* SYSCALL/SYSRET */
+-#define X86_FEATURE_MP                ( 1*32+19) /* MP Capable. */
+-#define X86_FEATURE_NX                ( 1*32+20) /* Execute Disable */
+-#define X86_FEATURE_MMXEXT    ( 1*32+22) /* AMD MMX extensions */
+-#define X86_FEATURE_FXSR_OPT  ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+-#define X86_FEATURE_GBPAGES   ( 1*32+26) /* "pdpe1gb" GB pages */
+-#define X86_FEATURE_RDTSCP    ( 1*32+27) /* RDTSCP */
+-#define X86_FEATURE_LM                ( 1*32+29) /* Long Mode (x86-64) */
+-#define X86_FEATURE_3DNOWEXT  ( 1*32+30) /* AMD 3DNow! extensions */
+-#define X86_FEATURE_3DNOW     ( 1*32+31) /* 3DNow! */
++#define X86_FEATURE_SYSCALL           ( 1*32+11) /* SYSCALL/SYSRET */
++#define X86_FEATURE_MP                        ( 1*32+19) /* MP Capable. */
++#define X86_FEATURE_NX                        ( 1*32+20) /* Execute Disable */
++#define X86_FEATURE_MMXEXT            ( 1*32+22) /* AMD MMX extensions */
++#define X86_FEATURE_FXSR_OPT          ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
++#define X86_FEATURE_GBPAGES           ( 1*32+26) /* "pdpe1gb" GB pages */
++#define X86_FEATURE_RDTSCP            ( 1*32+27) /* RDTSCP */
++#define X86_FEATURE_LM                        ( 1*32+29) /* Long Mode (x86-64) */
++#define X86_FEATURE_3DNOWEXT          ( 1*32+30) /* AMD 3DNow! extensions */
++#define X86_FEATURE_3DNOW             ( 1*32+31) /* 3DNow! */
+ 
+ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+-#define X86_FEATURE_RECOVERY  ( 2*32+ 0) /* CPU in recovery mode */
+-#define X86_FEATURE_LONGRUN   ( 2*32+ 1) /* Longrun power control */
+-#define X86_FEATURE_LRTI      ( 2*32+ 3) /* LongRun table interface */
++#define X86_FEATURE_RECOVERY          ( 2*32+ 0) /* CPU in recovery mode */
++#define X86_FEATURE_LONGRUN           ( 2*32+ 1) /* Longrun power control */
++#define X86_FEATURE_LRTI              ( 2*32+ 3) /* LongRun table interface */
+ 
+ /* Other features, Linux-defined mapping, word 3 */
+ /* This range is used for feature bits which conflict or are synthesized */
+-#define X86_FEATURE_CXMMX     ( 3*32+ 0) /* Cyrix MMX extensions */
+-#define X86_FEATURE_K6_MTRR   ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+-#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+-#define X86_FEATURE_CENTAUR_MCR       ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
++#define X86_FEATURE_CXMMX             ( 3*32+ 0) /* Cyrix MMX extensions */
++#define X86_FEATURE_K6_MTRR           ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
++#define X86_FEATURE_CYRIX_ARR         ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
++#define X86_FEATURE_CENTAUR_MCR               ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+ /* cpu types for specific tunings: */
+-#define X86_FEATURE_K8                ( 3*32+ 4) /* "" Opteron, Athlon64 */
+-#define X86_FEATURE_K7                ( 3*32+ 5) /* "" Athlon */
+-#define X86_FEATURE_P3                ( 3*32+ 6) /* "" P3 */
+-#define X86_FEATURE_P4                ( 3*32+ 7) /* "" P4 */
+-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
+-#define X86_FEATURE_UP                ( 3*32+ 9) /* smp kernel running on up */
+-#define X86_FEATURE_ART               ( 3*32+10) /* Platform has always running timer (ART) */
+-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
+-#define X86_FEATURE_PEBS      ( 3*32+12) /* Precise-Event Based Sampling */
+-#define X86_FEATURE_BTS               ( 3*32+13) /* Branch Trace Store */
+-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
+-#define X86_FEATURE_SYSENTER32        ( 3*32+15) /* "" sysenter in ia32 userspace */
+-#define X86_FEATURE_REP_GOOD  ( 3*32+16) /* rep microcode works well */
+-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
+-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
+-#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
+-#define X86_FEATURE_NOPL      ( 3*32+20) /* The NOPL (0F 1F) instructions */
+-#define X86_FEATURE_ALWAYS    ( 3*32+21) /* "" Always-present feature */
+-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
+-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
+-#define X86_FEATURE_NONSTOP_TSC       ( 3*32+24) /* TSC does not stop in C states */
+-#define X86_FEATURE_CPUID     ( 3*32+25) /* CPU has CPUID instruction itself */
+-#define X86_FEATURE_EXTD_APICID       ( 3*32+26) /* has extended APICID (8 bits) */
+-#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
+-#define X86_FEATURE_APERFMPERF        ( 3*32+28) /* APERFMPERF */
+-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+-#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
++#define X86_FEATURE_K8                        ( 3*32+ 4) /* "" Opteron, Athlon64 */
++#define X86_FEATURE_K7                        ( 3*32+ 5) /* "" Athlon */
++#define X86_FEATURE_P3                        ( 3*32+ 6) /* "" P3 */
++#define X86_FEATURE_P4                        ( 3*32+ 7) /* "" P4 */
++#define X86_FEATURE_CONSTANT_TSC      ( 3*32+ 8) /* TSC ticks at a constant rate */
++#define X86_FEATURE_UP                        ( 3*32+ 9) /* smp kernel running on up */
++#define X86_FEATURE_ART                       ( 3*32+10) /* Platform has always running timer (ART) */
++#define X86_FEATURE_ARCH_PERFMON      ( 3*32+11) /* Intel Architectural PerfMon */
++#define X86_FEATURE_PEBS              ( 3*32+12) /* Precise-Event Based Sampling */
++#define X86_FEATURE_BTS                       ( 3*32+13) /* Branch Trace Store */
++#define X86_FEATURE_SYSCALL32         ( 3*32+14) /* "" syscall in ia32 userspace */
++#define X86_FEATURE_SYSENTER32                ( 3*32+15) /* "" sysenter in ia32 userspace */
++#define X86_FEATURE_REP_GOOD          ( 3*32+16) /* rep microcode works well */
++#define X86_FEATURE_MFENCE_RDTSC      ( 3*32+17) /* "" Mfence synchronizes RDTSC */
++#define X86_FEATURE_LFENCE_RDTSC      ( 3*32+18) /* "" Lfence synchronizes RDTSC */
++#define X86_FEATURE_ACC_POWER         ( 3*32+19) /* AMD Accumulated Power Mechanism */
++#define X86_FEATURE_NOPL              ( 3*32+20) /* The NOPL (0F 1F) instructions */
++#define X86_FEATURE_ALWAYS            ( 3*32+21) /* "" Always-present feature */
++#define X86_FEATURE_XTOPOLOGY         ( 3*32+22) /* cpu topology enum extensions */
++#define X86_FEATURE_TSC_RELIABLE      ( 3*32+23) /* TSC is known to be reliable */
++#define X86_FEATURE_NONSTOP_TSC               ( 3*32+24) /* TSC does not stop in C states */
++#define X86_FEATURE_CPUID             ( 3*32+25) /* CPU has CPUID instruction itself */
++#define X86_FEATURE_EXTD_APICID               ( 3*32+26) /* has extended APICID (8 bits) */
++#define X86_FEATURE_AMD_DCM           ( 3*32+27) /* multi-node processor */
++#define X86_FEATURE_APERFMPERF                ( 3*32+28) /* APERFMPERF */
++#define X86_FEATURE_NONSTOP_TSC_S3    ( 3*32+30) /* TSC doesn't stop in S3 state */
++#define X86_FEATURE_TSC_KNOWN_FREQ    ( 3*32+31) /* TSC has known frequency */
+ 
+ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+-#define X86_FEATURE_XMM3      ( 4*32+ 0) /* "pni" SSE-3 */
+-#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
+-#define X86_FEATURE_DTES64    ( 4*32+ 2) /* 64-bit Debug Store */
+-#define X86_FEATURE_MWAIT     ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
+-#define X86_FEATURE_DSCPL     ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
+-#define X86_FEATURE_VMX               ( 4*32+ 5) /* Hardware virtualization */
+-#define X86_FEATURE_SMX               ( 4*32+ 6) /* Safer mode */
+-#define X86_FEATURE_EST               ( 4*32+ 7) /* Enhanced SpeedStep */
+-#define X86_FEATURE_TM2               ( 4*32+ 8) /* Thermal Monitor 2 */
+-#define X86_FEATURE_SSSE3     ( 4*32+ 9) /* Supplemental SSE-3 */
+-#define X86_FEATURE_CID               ( 4*32+10) /* Context ID */
+-#define X86_FEATURE_SDBG      ( 4*32+11) /* Silicon Debug */
+-#define X86_FEATURE_FMA               ( 4*32+12) /* Fused multiply-add */
+-#define X86_FEATURE_CX16      ( 4*32+13) /* CMPXCHG16B */
+-#define X86_FEATURE_XTPR      ( 4*32+14) /* Send Task Priority Messages */
+-#define X86_FEATURE_PDCM      ( 4*32+15) /* Performance Capabilities */
+-#define X86_FEATURE_PCID      ( 4*32+17) /* Process Context Identifiers */
+-#define X86_FEATURE_DCA               ( 4*32+18) /* Direct Cache Access */
+-#define X86_FEATURE_XMM4_1    ( 4*32+19) /* "sse4_1" SSE-4.1 */
+-#define X86_FEATURE_XMM4_2    ( 4*32+20) /* "sse4_2" SSE-4.2 */
+-#define X86_FEATURE_X2APIC    ( 4*32+21) /* x2APIC */
+-#define X86_FEATURE_MOVBE     ( 4*32+22) /* MOVBE instruction */
+-#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
++#define X86_FEATURE_XMM3              ( 4*32+ 0) /* "pni" SSE-3 */
++#define X86_FEATURE_PCLMULQDQ         ( 4*32+ 1) /* PCLMULQDQ instruction */
++#define X86_FEATURE_DTES64            ( 4*32+ 2) /* 64-bit Debug Store */
++#define X86_FEATURE_MWAIT             ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
++#define X86_FEATURE_DSCPL             ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
++#define X86_FEATURE_VMX                       ( 4*32+ 5) /* Hardware virtualization */
++#define X86_FEATURE_SMX                       ( 4*32+ 6) /* Safer mode */
++#define X86_FEATURE_EST                       ( 4*32+ 7) /* Enhanced SpeedStep */
++#define X86_FEATURE_TM2                       ( 4*32+ 8) /* Thermal Monitor 2 */
++#define X86_FEATURE_SSSE3             ( 4*32+ 9) /* Supplemental SSE-3 */
++#define X86_FEATURE_CID                       ( 4*32+10) /* Context ID */
++#define X86_FEATURE_SDBG              ( 4*32+11) /* Silicon Debug */
++#define X86_FEATURE_FMA                       ( 4*32+12) /* Fused multiply-add */
++#define X86_FEATURE_CX16              ( 4*32+13) /* CMPXCHG16B */
++#define X86_FEATURE_XTPR              ( 4*32+14) /* Send Task Priority Messages */
++#define X86_FEATURE_PDCM              ( 4*32+15) /* Performance Capabilities */
++#define X86_FEATURE_PCID              ( 4*32+17) /* Process Context Identifiers */
++#define X86_FEATURE_DCA                       ( 4*32+18) /* Direct Cache Access */
++#define X86_FEATURE_XMM4_1            ( 4*32+19) /* "sse4_1" SSE-4.1 */
++#define X86_FEATURE_XMM4_2            ( 4*32+20) /* "sse4_2" SSE-4.2 */
++#define X86_FEATURE_X2APIC            ( 4*32+21) /* x2APIC */
++#define X86_FEATURE_MOVBE             ( 4*32+22) /* MOVBE instruction */
++#define X86_FEATURE_POPCNT            ( 4*32+23) /* POPCNT instruction */
+ #define X86_FEATURE_TSC_DEADLINE_TIMER        ( 4*32+24) /* Tsc deadline timer */
+-#define X86_FEATURE_AES               ( 4*32+25) /* AES instructions */
+-#define X86_FEATURE_XSAVE     ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+-#define X86_FEATURE_OSXSAVE   ( 4*32+27) /* "" XSAVE enabled in the OS */
+-#define X86_FEATURE_AVX               ( 4*32+28) /* Advanced Vector Extensions */
+-#define X86_FEATURE_F16C      ( 4*32+29) /* 16-bit fp conversions */
+-#define X86_FEATURE_RDRAND    ( 4*32+30) /* The RDRAND instruction */
+-#define X86_FEATURE_HYPERVISOR        ( 4*32+31) /* Running on a hypervisor */
++#define X86_FEATURE_AES                       ( 4*32+25) /* AES instructions */
++#define X86_FEATURE_XSAVE             ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
++#define X86_FEATURE_OSXSAVE           ( 4*32+27) /* "" XSAVE enabled in the OS */
++#define X86_FEATURE_AVX                       ( 4*32+28) /* Advanced Vector Extensions */
++#define X86_FEATURE_F16C              ( 4*32+29) /* 16-bit fp conversions */
++#define X86_FEATURE_RDRAND            ( 4*32+30) /* The RDRAND instruction */
++#define X86_FEATURE_HYPERVISOR                ( 4*32+31) /* Running on a hypervisor */
+ 
+ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+-#define X86_FEATURE_XSTORE    ( 5*32+ 2) /* "rng" RNG present (xstore) */
+-#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
+-#define X86_FEATURE_XCRYPT    ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+-#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+-#define X86_FEATURE_ACE2      ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+-#define X86_FEATURE_ACE2_EN   ( 5*32+ 9) /* ACE v2 enabled */
+-#define X86_FEATURE_PHE               ( 5*32+10) /* PadLock Hash Engine */
+-#define X86_FEATURE_PHE_EN    ( 5*32+11) /* PHE enabled */
+-#define X86_FEATURE_PMM               ( 5*32+12) /* PadLock Montgomery Multiplier */
+-#define X86_FEATURE_PMM_EN    ( 5*32+13) /* PMM enabled */
++#define X86_FEATURE_XSTORE            ( 5*32+ 2) /* "rng" RNG present (xstore) */
++#define X86_FEATURE_XSTORE_EN         ( 5*32+ 3) /* "rng_en" RNG enabled */
++#define X86_FEATURE_XCRYPT            ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
++#define X86_FEATURE_XCRYPT_EN         ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
++#define X86_FEATURE_ACE2              ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
++#define X86_FEATURE_ACE2_EN           ( 5*32+ 9) /* ACE v2 enabled */
++#define X86_FEATURE_PHE                       ( 5*32+10) /* PadLock Hash Engine */
++#define X86_FEATURE_PHE_EN            ( 5*32+11) /* PHE enabled */
++#define X86_FEATURE_PMM                       ( 5*32+12) /* PadLock Montgomery Multiplier */
++#define X86_FEATURE_PMM_EN            ( 5*32+13) /* PMM enabled */
+ 
+ /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+-#define X86_FEATURE_LAHF_LM   ( 6*32+ 0) /* LAHF/SAHF in long mode */
+-#define X86_FEATURE_CMP_LEGACY        ( 6*32+ 1) /* If yes HyperThreading not valid */
+-#define X86_FEATURE_SVM               ( 6*32+ 2) /* Secure virtual machine */
+-#define X86_FEATURE_EXTAPIC   ( 6*32+ 3) /* Extended APIC space */
+-#define X86_FEATURE_CR8_LEGACY        ( 6*32+ 4) /* CR8 in 32-bit mode */
+-#define X86_FEATURE_ABM               ( 6*32+ 5) /* Advanced bit manipulation */
+-#define X86_FEATURE_SSE4A     ( 6*32+ 6) /* SSE-4A */
+-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
+-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
+-#define X86_FEATURE_OSVW      ( 6*32+ 9) /* OS Visible Workaround */
+-#define X86_FEATURE_IBS               ( 6*32+10) /* Instruction Based Sampling */
+-#define X86_FEATURE_XOP               ( 6*32+11) /* extended AVX instructions */
+-#define X86_FEATURE_SKINIT    ( 6*32+12) /* SKINIT/STGI instructions */
+-#define X86_FEATURE_WDT               ( 6*32+13) /* Watchdog timer */
+-#define X86_FEATURE_LWP               ( 6*32+15) /* Light Weight Profiling */
+-#define X86_FEATURE_FMA4      ( 6*32+16) /* 4 operands MAC instructions */
+-#define X86_FEATURE_TCE               ( 6*32+17) /* translation cache extension */
+-#define X86_FEATURE_NODEID_MSR        ( 6*32+19) /* NodeId MSR */
+-#define X86_FEATURE_TBM               ( 6*32+21) /* trailing bit manipulations */
+-#define X86_FEATURE_TOPOEXT   ( 6*32+22) /* topology extensions CPUID leafs */
+-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
+-#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
+-#define X86_FEATURE_BPEXT     (6*32+26) /* data breakpoint extension */
+-#define X86_FEATURE_PTSC      ( 6*32+27) /* performance time-stamp counter */
+-#define X86_FEATURE_PERFCTR_L2        ( 6*32+28) /* L2 performance counter extensions */
+-#define X86_FEATURE_MWAITX    ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
++#define X86_FEATURE_LAHF_LM           ( 6*32+ 0) /* LAHF/SAHF in long mode */
++#define X86_FEATURE_CMP_LEGACY                ( 6*32+ 1) /* If yes HyperThreading not valid */
++#define X86_FEATURE_SVM                       ( 6*32+ 2) /* Secure virtual machine */
++#define X86_FEATURE_EXTAPIC           ( 6*32+ 3) /* Extended APIC space */
++#define X86_FEATURE_CR8_LEGACY                ( 6*32+ 4) /* CR8 in 32-bit mode */
++#define X86_FEATURE_ABM                       ( 6*32+ 5) /* Advanced bit manipulation */
++#define X86_FEATURE_SSE4A             ( 6*32+ 6) /* SSE-4A */
++#define X86_FEATURE_MISALIGNSSE               ( 6*32+ 7) /* Misaligned SSE mode */
++#define X86_FEATURE_3DNOWPREFETCH     ( 6*32+ 8) /* 3DNow prefetch instructions */
++#define X86_FEATURE_OSVW              ( 6*32+ 9) /* OS Visible Workaround */
++#define X86_FEATURE_IBS                       ( 6*32+10) /* Instruction Based Sampling */
++#define X86_FEATURE_XOP                       ( 6*32+11) /* extended AVX instructions */
++#define X86_FEATURE_SKINIT            ( 6*32+12) /* SKINIT/STGI instructions */
++#define X86_FEATURE_WDT                       ( 6*32+13) /* Watchdog timer */
++#define X86_FEATURE_LWP                       ( 6*32+15) /* Light Weight Profiling */
++#define X86_FEATURE_FMA4              ( 6*32+16) /* 4 operands MAC instructions */
++#define X86_FEATURE_TCE                       ( 6*32+17) /* translation cache extension */
++#define X86_FEATURE_NODEID_MSR                ( 6*32+19) /* NodeId MSR */
++#define X86_FEATURE_TBM                       ( 6*32+21) /* trailing bit manipulations */
++#define X86_FEATURE_TOPOEXT           ( 6*32+22) /* topology extensions CPUID leafs */
++#define X86_FEATURE_PERFCTR_CORE      ( 6*32+23) /* core performance counter extensions */
++#define X86_FEATURE_PERFCTR_NB                ( 6*32+24) /* NB performance counter extensions */
++#define X86_FEATURE_BPEXT             (6*32+26) /* data breakpoint extension */
++#define X86_FEATURE_PTSC              ( 6*32+27) /* performance time-stamp counter */
++#define X86_FEATURE_PERFCTR_L2                ( 6*32+28) /* Last Level Cache performance counter extensions */
++#define X86_FEATURE_MWAITX            ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
+ 
+ /*
+  * Auxiliary flags: Linux defined - For features scattered in various
+@@ -191,150 +191,152 @@
+  *
+  * Reuse free bits when adding new feature flags!
+  */
+-#define X86_FEATURE_RING3MWAIT        ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
+-#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
+-#define X86_FEATURE_CPB               ( 7*32+ 2) /* AMD Core Performance Boost */
+-#define X86_FEATURE_EPB               ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+-#define X86_FEATURE_CAT_L3    ( 7*32+ 4) /* Cache Allocation Technology L3 */
+-#define X86_FEATURE_CAT_L2    ( 7*32+ 5) /* Cache Allocation Technology L2 */
+-#define X86_FEATURE_CDP_L3    ( 7*32+ 6) /* Code and Data Prioritization L3 */
++#define X86_FEATURE_RING3MWAIT                ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
++#define X86_FEATURE_CPUID_FAULT               ( 7*32+ 1) /* Intel CPUID faulting */
++#define X86_FEATURE_CPB                       ( 7*32+ 2) /* AMD Core Performance Boost */
++#define X86_FEATURE_EPB                       ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
++#define X86_FEATURE_CAT_L3            ( 7*32+ 4) /* Cache Allocation Technology L3 */
++#define X86_FEATURE_CAT_L2            ( 7*32+ 5) /* Cache Allocation Technology L2 */
++#define X86_FEATURE_CDP_L3            ( 7*32+ 6) /* Code and Data Prioritization L3 */
+ 
+-#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
++#define X86_FEATURE_HW_PSTATE         ( 7*32+ 8) /* AMD HW-PState */
++#define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
++#define X86_FEATURE_SME                       ( 7*32+10) /* AMD Secure Memory Encryption */
+ 
+-#define X86_FEATURE_INTEL_PPIN        ( 7*32+14) /* Intel Processor Inventory Number */
+-#define X86_FEATURE_INTEL_PT  ( 7*32+15) /* Intel Processor Trace */
+-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
++#define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
++#define X86_FEATURE_INTEL_PT          ( 7*32+15) /* Intel Processor Trace */
++#define X86_FEATURE_AVX512_4VNNIW     (7*32+16) /* AVX-512 Neural Network Instructions */
++#define X86_FEATURE_AVX512_4FMAPS     (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ 
+-#define X86_FEATURE_MBA         ( 7*32+18) /* Memory Bandwidth Allocation */
++#define X86_FEATURE_MBA                       ( 7*32+18) /* Memory Bandwidth Allocation */
+ 
+ /* Virtualization flags: Linux defined, word 8 */
+-#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+-#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
+-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
+-#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
+-#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
++#define X86_FEATURE_TPR_SHADOW                ( 8*32+ 0) /* Intel TPR Shadow */
++#define X86_FEATURE_VNMI              ( 8*32+ 1) /* Intel Virtual NMI */
++#define X86_FEATURE_FLEXPRIORITY      ( 8*32+ 2) /* Intel FlexPriority */
++#define X86_FEATURE_EPT                       ( 8*32+ 3) /* Intel Extended Page Table */
++#define X86_FEATURE_VPID              ( 8*32+ 4) /* Intel Virtual Processor ID */
+ 
+-#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
+-#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
++#define X86_FEATURE_VMMCALL           ( 8*32+15) /* Prefer vmmcall to vmcall */
++#define X86_FEATURE_XENPV             ( 8*32+16) /* "" Xen paravirtual guest */
+ 
+ 
+ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+-#define X86_FEATURE_FSGSBASE  ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+-#define X86_FEATURE_TSC_ADJUST        ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
+-#define X86_FEATURE_BMI1      ( 9*32+ 3) /* 1st group bit manipulation extensions */
+-#define X86_FEATURE_HLE               ( 9*32+ 4) /* Hardware Lock Elision */
+-#define X86_FEATURE_AVX2      ( 9*32+ 5) /* AVX2 instructions */
+-#define X86_FEATURE_SMEP      ( 9*32+ 7) /* Supervisor Mode Execution Protection */
+-#define X86_FEATURE_BMI2      ( 9*32+ 8) /* 2nd group bit manipulation extensions */
+-#define X86_FEATURE_ERMS      ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+-#define X86_FEATURE_INVPCID   ( 9*32+10) /* Invalidate Processor Context ID */
+-#define X86_FEATURE_RTM               ( 9*32+11) /* Restricted Transactional Memory */
+-#define X86_FEATURE_CQM               ( 9*32+12) /* Cache QoS Monitoring */
+-#define X86_FEATURE_MPX               ( 9*32+14) /* Memory Protection Extension */
+-#define X86_FEATURE_RDT_A     ( 9*32+15) /* Resource Director Technology Allocation */
+-#define X86_FEATURE_AVX512F   ( 9*32+16) /* AVX-512 Foundation */
+-#define X86_FEATURE_AVX512DQ  ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+-#define X86_FEATURE_RDSEED    ( 9*32+18) /* The RDSEED instruction */
+-#define X86_FEATURE_ADX               ( 9*32+19) /* The ADCX and ADOX instructions */
+-#define X86_FEATURE_SMAP      ( 9*32+20) /* Supervisor Mode Access Prevention */
+-#define X86_FEATURE_AVX512IFMA  ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
+-#define X86_FEATURE_CLFLUSHOPT        ( 9*32+23) /* CLFLUSHOPT instruction */
+-#define X86_FEATURE_CLWB      ( 9*32+24) /* CLWB instruction */
+-#define X86_FEATURE_AVX512PF  ( 9*32+26) /* AVX-512 Prefetch */
+-#define X86_FEATURE_AVX512ER  ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+-#define X86_FEATURE_AVX512CD  ( 9*32+28) /* AVX-512 Conflict Detection */
+-#define X86_FEATURE_SHA_NI    ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+-#define X86_FEATURE_AVX512BW  ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+-#define X86_FEATURE_AVX512VL  ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
++#define X86_FEATURE_FSGSBASE          ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
++#define X86_FEATURE_TSC_ADJUST                ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
++#define X86_FEATURE_BMI1              ( 9*32+ 3) /* 1st group bit manipulation extensions */
++#define X86_FEATURE_HLE                       ( 9*32+ 4) /* Hardware Lock Elision */
++#define X86_FEATURE_AVX2              ( 9*32+ 5) /* AVX2 instructions */
++#define X86_FEATURE_SMEP              ( 9*32+ 7) /* Supervisor Mode Execution Protection */
++#define X86_FEATURE_BMI2              ( 9*32+ 8) /* 2nd group bit manipulation extensions */
++#define X86_FEATURE_ERMS              ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
++#define X86_FEATURE_INVPCID           ( 9*32+10) /* Invalidate Processor Context ID */
++#define X86_FEATURE_RTM                       ( 9*32+11) /* Restricted Transactional Memory */
++#define X86_FEATURE_CQM                       ( 9*32+12) /* Cache QoS Monitoring */
++#define X86_FEATURE_MPX                       ( 9*32+14) /* Memory Protection Extension */
++#define X86_FEATURE_RDT_A             ( 9*32+15) /* Resource Director Technology Allocation */
++#define X86_FEATURE_AVX512F           ( 9*32+16) /* AVX-512 Foundation */
++#define X86_FEATURE_AVX512DQ          ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
++#define X86_FEATURE_RDSEED            ( 9*32+18) /* The RDSEED instruction */
++#define X86_FEATURE_ADX                       ( 9*32+19) /* The ADCX and ADOX instructions */
++#define X86_FEATURE_SMAP              ( 9*32+20) /* Supervisor Mode Access Prevention */
++#define X86_FEATURE_AVX512IFMA                ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
++#define X86_FEATURE_CLFLUSHOPT                ( 9*32+23) /* CLFLUSHOPT instruction */
++#define X86_FEATURE_CLWB              ( 9*32+24) /* CLWB instruction */
++#define X86_FEATURE_AVX512PF          ( 9*32+26) /* AVX-512 Prefetch */
++#define X86_FEATURE_AVX512ER          ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
++#define X86_FEATURE_AVX512CD          ( 9*32+28) /* AVX-512 Conflict Detection */
++#define X86_FEATURE_SHA_NI            ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
++#define X86_FEATURE_AVX512BW          ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
++#define X86_FEATURE_AVX512VL          ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+ 
+ /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
+-#define X86_FEATURE_XSAVEOPT  (10*32+ 0) /* XSAVEOPT */
+-#define X86_FEATURE_XSAVEC    (10*32+ 1) /* XSAVEC */
+-#define X86_FEATURE_XGETBV1   (10*32+ 2) /* XGETBV with ECX = 1 */
+-#define X86_FEATURE_XSAVES    (10*32+ 3) /* XSAVES/XRSTORS */
++#define X86_FEATURE_XSAVEOPT          (10*32+ 0) /* XSAVEOPT */
++#define X86_FEATURE_XSAVEC            (10*32+ 1) /* XSAVEC */
++#define X86_FEATURE_XGETBV1           (10*32+ 2) /* XGETBV with ECX = 1 */
++#define X86_FEATURE_XSAVES            (10*32+ 3) /* XSAVES/XRSTORS */
+ 
+ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
+-#define X86_FEATURE_CQM_LLC   (11*32+ 1) /* LLC QoS if 1 */
++#define X86_FEATURE_CQM_LLC           (11*32+ 1) /* LLC QoS if 1 */
+ 
+ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
+-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
++#define X86_FEATURE_CQM_OCCUP_LLC     (12*32+ 0) /* LLC occupancy monitoring if 1 */
++#define X86_FEATURE_CQM_MBM_TOTAL     (12*32+ 1) /* LLC Total MBM monitoring */
++#define X86_FEATURE_CQM_MBM_LOCAL     (12*32+ 2) /* LLC Local MBM monitoring */
+ 
+ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+-#define X86_FEATURE_CLZERO    (13*32+0) /* CLZERO instruction */
+-#define X86_FEATURE_IRPERF    (13*32+1) /* Instructions Retired Count */
++#define X86_FEATURE_CLZERO            (13*32+0) /* CLZERO instruction */
++#define X86_FEATURE_IRPERF            (13*32+1) /* Instructions Retired Count */
+ 
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+-#define X86_FEATURE_DTHERM    (14*32+ 0) /* Digital Thermal Sensor */
+-#define X86_FEATURE_IDA               (14*32+ 1) /* Intel Dynamic Acceleration */
+-#define X86_FEATURE_ARAT      (14*32+ 2) /* Always Running APIC Timer */
+-#define X86_FEATURE_PLN               (14*32+ 4) /* Intel Power Limit Notification */
+-#define X86_FEATURE_PTS               (14*32+ 6) /* Intel Package Thermal Status */
+-#define X86_FEATURE_HWP               (14*32+ 7) /* Intel Hardware P-states */
+-#define X86_FEATURE_HWP_NOTIFY        (14*32+ 8) /* HWP Notification */
+-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+-#define X86_FEATURE_HWP_EPP   (14*32+10) /* HWP Energy Perf. Preference */
+-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
++#define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
++#define X86_FEATURE_IDA                       (14*32+ 1) /* Intel Dynamic Acceleration */
++#define X86_FEATURE_ARAT              (14*32+ 2) /* Always Running APIC Timer */
++#define X86_FEATURE_PLN                       (14*32+ 4) /* Intel Power Limit Notification */
++#define X86_FEATURE_PTS                       (14*32+ 6) /* Intel Package Thermal Status */
++#define X86_FEATURE_HWP                       (14*32+ 7) /* Intel Hardware P-states */
++#define X86_FEATURE_HWP_NOTIFY                (14*32+ 8) /* HWP Notification */
++#define X86_FEATURE_HWP_ACT_WINDOW    (14*32+ 9) /* HWP Activity Window */
++#define X86_FEATURE_HWP_EPP           (14*32+10) /* HWP Energy Perf. Preference */
++#define X86_FEATURE_HWP_PKG_REQ               (14*32+11) /* HWP Package Level Request */
+ 
+ /* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
+-#define X86_FEATURE_NPT               (15*32+ 0) /* Nested Page Table support */
+-#define X86_FEATURE_LBRV      (15*32+ 1) /* LBR Virtualization support */
+-#define X86_FEATURE_SVML      (15*32+ 2) /* "svm_lock" SVM locking MSR */
+-#define X86_FEATURE_NRIPS     (15*32+ 3) /* "nrip_save" SVM next_rip save */
+-#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
+-#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+-#define X86_FEATURE_AVIC      (15*32+13) /* Virtual Interrupt Controller */
+-#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
++#define X86_FEATURE_NPT                       (15*32+ 0) /* Nested Page Table support */
++#define X86_FEATURE_LBRV              (15*32+ 1) /* LBR Virtualization support */
++#define X86_FEATURE_SVML              (15*32+ 2) /* "svm_lock" SVM locking MSR */
++#define X86_FEATURE_NRIPS             (15*32+ 3) /* "nrip_save" SVM next_rip save */
++#define X86_FEATURE_TSCRATEMSR                (15*32+ 4) /* "tsc_scale" TSC scaling support */
++#define X86_FEATURE_VMCBCLEAN         (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
++#define X86_FEATURE_FLUSHBYASID               (15*32+ 6) /* flush-by-ASID support */
++#define X86_FEATURE_DECODEASSISTS     (15*32+ 7) /* Decode Assists support */
++#define X86_FEATURE_PAUSEFILTER               (15*32+10) /* filtered pause intercept */
++#define X86_FEATURE_PFTHRESHOLD               (15*32+12) /* pause filter threshold */
++#define X86_FEATURE_AVIC              (15*32+13) /* Virtual Interrupt Controller */
++#define X86_FEATURE_V_VMSAVE_VMLOAD   (15*32+15) /* Virtual VMSAVE VMLOAD */
++#define X86_FEATURE_VGIF              (15*32+16) /* Virtual GIF */
+ 
+ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
+-#define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
+-#define X86_FEATURE_PKU               (16*32+ 3) /* Protection Keys for Userspace */
+-#define X86_FEATURE_OSPKE     (16*32+ 4) /* OS Protection Keys Enable */
+-#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+-#define X86_FEATURE_GFNI      (16*32+ 8) /* Galois Field New Instructions */
+-#define X86_FEATURE_VAES      (16*32+ 9) /* Vector AES */
+-#define X86_FEATURE_VPCLMULQDQ        (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
+-#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */
+-#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
+-#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
+-#define X86_FEATURE_LA57      (16*32+16) /* 5-level page tables */
+-#define X86_FEATURE_RDPID     (16*32+22) /* RDPID instruction */
++#define X86_FEATURE_AVX512VBMI                (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
++#define X86_FEATURE_PKU                       (16*32+ 3) /* Protection Keys for Userspace */
++#define X86_FEATURE_OSPKE             (16*32+ 4) /* OS Protection Keys Enable */
++#define X86_FEATURE_AVX512_VBMI2      (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
++#define X86_FEATURE_GFNI              (16*32+ 8) /* Galois Field New Instructions */
++#define X86_FEATURE_VAES              (16*32+ 9) /* Vector AES */
++#define X86_FEATURE_VPCLMULQDQ                (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
++#define X86_FEATURE_AVX512_VNNI               (16*32+ 11) /* Vector Neural Network Instructions */
++#define X86_FEATURE_AVX512_BITALG     (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
++#define X86_FEATURE_AVX512_VPOPCNTDQ  (16*32+14) /* POPCNT for vectors of DW/QW */
++#define X86_FEATURE_LA57              (16*32+16) /* 5-level page tables */
++#define X86_FEATURE_RDPID             (16*32+22) /* RDPID instruction */
+ 
+ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
+-#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
+-#define X86_FEATURE_SUCCOR    (17*32+1) /* Uncorrectable error containment and recovery */
+-#define X86_FEATURE_SMCA      (17*32+3) /* Scalable MCA */
++#define X86_FEATURE_OVERFLOW_RECOV    (17*32+0) /* MCA overflow recovery support */
++#define X86_FEATURE_SUCCOR            (17*32+1) /* Uncorrectable error containment and recovery */
++#define X86_FEATURE_SMCA              (17*32+3) /* Scalable MCA */
+ 
+ /*
+  * BUG word(s)
+  */
+-#define X86_BUG(x)            (NCAPINTS*32 + (x))
++#define X86_BUG(x)                    (NCAPINTS*32 + (x))
+ 
+-#define X86_BUG_F00F          X86_BUG(0) /* Intel F00F */
+-#define X86_BUG_FDIV          X86_BUG(1) /* FPU FDIV */
+-#define X86_BUG_COMA          X86_BUG(2) /* Cyrix 6x86 coma */
+-#define X86_BUG_AMD_TLB_MMATCH        X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+-#define X86_BUG_AMD_APIC_C1E  X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+-#define X86_BUG_11AP          X86_BUG(5) /* Bad local APIC aka 11AP */
+-#define X86_BUG_FXSAVE_LEAK   X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+-#define X86_BUG_CLFLUSH_MONITOR       X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+-#define X86_BUG_SYSRET_SS_ATTRS       X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
++#define X86_BUG_F00F                  X86_BUG(0) /* Intel F00F */
++#define X86_BUG_FDIV                  X86_BUG(1) /* FPU FDIV */
++#define X86_BUG_COMA                  X86_BUG(2) /* Cyrix 6x86 coma */
++#define X86_BUG_AMD_TLB_MMATCH                X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
++#define X86_BUG_AMD_APIC_C1E          X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
++#define X86_BUG_11AP                  X86_BUG(5) /* Bad local APIC aka 11AP */
++#define X86_BUG_FXSAVE_LEAK           X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
++#define X86_BUG_CLFLUSH_MONITOR               X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
++#define X86_BUG_SYSRET_SS_ATTRS               X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+ #ifdef CONFIG_X86_32
+ /*
+  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
+  * to avoid confusion.
+  */
+-#define X86_BUG_ESPFIX                X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
++#define X86_BUG_ESPFIX                        X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+ #endif
+-#define X86_BUG_NULL_SEG      X86_BUG(10) /* Nulling a selector preserves the base */
+-#define X86_BUG_SWAPGS_FENCE  X86_BUG(11) /* SWAPGS without input dep on GS */
+-#define X86_BUG_MONITOR               X86_BUG(12) /* IPI required to wake up remote CPU */
+-#define X86_BUG_AMD_E400      X86_BUG(13) /* CPU is among the affected by Erratum 400 */
++#define X86_BUG_NULL_SEG              X86_BUG(10) /* Nulling a selector preserves the base */
++#define X86_BUG_SWAPGS_FENCE          X86_BUG(11) /* SWAPGS without input dep on GS */
++#define X86_BUG_MONITOR                       X86_BUG(12) /* IPI required to wake up remote CPU */
++#define X86_BUG_AMD_E400              X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0114-selftests-x86-protection_keys-Fix-syscall-NR-redefin.patch b/patches/kernel/0114-selftests-x86-protection_keys-Fix-syscall-NR-redefin.patch

deleted file mode 100644 (file)

index 810e63a..0000000
--- a/patches/kernel/0114-selftests-x86-protection_keys-Fix-syscall-NR-redefin.patch
+++ /dev/null
@@ -1,77 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sat, 4 Nov 2017 04:19:48 -0700
-Subject: [PATCH] selftests/x86/protection_keys: Fix syscall NR redefinition
- warnings
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-On new enough glibc, the pkey syscalls numbers are available.  Check
-first before defining them to avoid warnings like:
-
-protection_keys.c:198:0: warning: "SYS_pkey_alloc" redefined
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: stable@vger.kernel.org
-Link: http://lkml.kernel.org/r/1fbef53a9e6befb7165ff855fc1a7d4788a191d6.1509794321.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 693cb5580fdb026922363aa103add64b3ecd572e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 64c8ec4beb84ca8b0ff3250a8b6044d06be6315b)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/testing/selftests/x86/protection_keys.c | 24 ++++++++++++++++++------
- 1 file changed, 18 insertions(+), 6 deletions(-)
-
-diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
-index 3237bc010e1c..3c54d5c40952 100644
---- a/tools/testing/selftests/x86/protection_keys.c
-+++ b/tools/testing/selftests/x86/protection_keys.c
-@@ -188,17 +188,29 @@ void lots_o_noops_around_write(int *write_to_me)
- #define u64 uint64_t
- 
- #ifdef __i386__
--#define SYS_mprotect_key 380
--#define SYS_pkey_alloc         381
--#define SYS_pkey_free  382
-+
-+#ifndef SYS_mprotect_key
-+# define SYS_mprotect_key 380
-+#endif
-+#ifndef SYS_pkey_alloc
-+# define SYS_pkey_alloc        381
-+# define SYS_pkey_free         382
-+#endif
- #define REG_IP_IDX REG_EIP
- #define si_pkey_offset 0x14
-+
- #else
--#define SYS_mprotect_key 329
--#define SYS_pkey_alloc         330
--#define SYS_pkey_free  331
-+
-+#ifndef SYS_mprotect_key
-+# define SYS_mprotect_key 329
-+#endif
-+#ifndef SYS_pkey_alloc
-+# define SYS_pkey_alloc        330
-+# define SYS_pkey_free         331
-+#endif
- #define REG_IP_IDX REG_RIP
- #define si_pkey_offset 0x20
-+
- #endif
- 
- void dump_mem(void *dumpme, int len_bytes)
--- 
-2.14.2
-
diff --git a/patches/kernel/0114-x86-cpufeatures-Fix-various-details-in-the-feature-d.patch b/patches/kernel/0114-x86-cpufeatures-Fix-various-details-in-the-feature-d.patch

new file mode 100644 (file)

index 0000000..0b12f37
--- /dev/null
+++ b/patches/kernel/0114-x86-cpufeatures-Fix-various-details-in-the-feature-d.patch
@@ -0,0 +1,369 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@kernel.org>
+Date: Tue, 31 Oct 2017 13:17:23 +0100
+Subject: [PATCH] x86/cpufeatures: Fix various details in the feature
+ definitions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Kept this commit separate from the re-tabulation changes, to make
+the changes easier to review:
+
+ - add better explanation for entries with no explanation
+ - fix/enhance the text of some of the entries
+ - fix the vertical alignment of some of the feature number definitions
+ - fix inconsistent capitalization
+ - ... and lots of other small details
+
+i.e. make it all more of a coherent unit, instead of a patchwork of years of additions.
+
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20171031121723.28524-4-mingo@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit f3a624e901c633593156f7b00ca743a6204a29bc)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 256c600cf0edb23ea5f2d70e7da091c909f5ace6)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cpufeatures.h | 149 ++++++++++++++++++-------------------
+ 1 file changed, 74 insertions(+), 75 deletions(-)
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index a021b0756af6..6db782ed9cdb 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -19,14 +19,12 @@
+  * Note: If the comment begins with a quoted string, that string is used
+  * in /proc/cpuinfo instead of the macro name.  If the string is "",
+  * this feature bit is not displayed in /proc/cpuinfo at all.
+- */
+-
+-/*
++ *
+  * When adding new features here that depend on other features,
+- * please update the table in kernel/cpu/cpuid-deps.c
++ * please update the table in kernel/cpu/cpuid-deps.c as well.
+  */
+ 
+-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
++/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
+ #define X86_FEATURE_FPU                       ( 0*32+ 0) /* Onboard FPU */
+ #define X86_FEATURE_VME                       ( 0*32+ 1) /* Virtual Mode Extensions */
+ #define X86_FEATURE_DE                        ( 0*32+ 2) /* Debugging Extensions */
+@@ -41,8 +39,7 @@
+ #define X86_FEATURE_MTRR              ( 0*32+12) /* Memory Type Range Registers */
+ #define X86_FEATURE_PGE                       ( 0*32+13) /* Page Global Enable */
+ #define X86_FEATURE_MCA                       ( 0*32+14) /* Machine Check Architecture */
+-#define X86_FEATURE_CMOV              ( 0*32+15) /* CMOV instructions */
+-                                        /* (plus FCMOVcc, FCOMI with FPU) */
++#define X86_FEATURE_CMOV              ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+ #define X86_FEATURE_PAT                       ( 0*32+16) /* Page Attribute Table */
+ #define X86_FEATURE_PSE36             ( 0*32+17) /* 36-bit PSEs */
+ #define X86_FEATURE_PN                        ( 0*32+18) /* Processor serial number */
+@@ -62,15 +59,15 @@
+ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+ /* Don't duplicate feature flags which are redundant with Intel! */
+ #define X86_FEATURE_SYSCALL           ( 1*32+11) /* SYSCALL/SYSRET */
+-#define X86_FEATURE_MP                        ( 1*32+19) /* MP Capable. */
++#define X86_FEATURE_MP                        ( 1*32+19) /* MP Capable */
+ #define X86_FEATURE_NX                        ( 1*32+20) /* Execute Disable */
+ #define X86_FEATURE_MMXEXT            ( 1*32+22) /* AMD MMX extensions */
+ #define X86_FEATURE_FXSR_OPT          ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+ #define X86_FEATURE_GBPAGES           ( 1*32+26) /* "pdpe1gb" GB pages */
+ #define X86_FEATURE_RDTSCP            ( 1*32+27) /* RDTSCP */
+-#define X86_FEATURE_LM                        ( 1*32+29) /* Long Mode (x86-64) */
+-#define X86_FEATURE_3DNOWEXT          ( 1*32+30) /* AMD 3DNow! extensions */
+-#define X86_FEATURE_3DNOW             ( 1*32+31) /* 3DNow! */
++#define X86_FEATURE_LM                        ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
++#define X86_FEATURE_3DNOWEXT          ( 1*32+30) /* AMD 3DNow extensions */
++#define X86_FEATURE_3DNOW             ( 1*32+31) /* 3DNow */
+ 
+ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+ #define X86_FEATURE_RECOVERY          ( 2*32+ 0) /* CPU in recovery mode */
+@@ -83,66 +80,67 @@
+ #define X86_FEATURE_K6_MTRR           ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+ #define X86_FEATURE_CYRIX_ARR         ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+ #define X86_FEATURE_CENTAUR_MCR               ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+-/* cpu types for specific tunings: */
++
++/* CPU types for specific tunings: */
+ #define X86_FEATURE_K8                        ( 3*32+ 4) /* "" Opteron, Athlon64 */
+ #define X86_FEATURE_K7                        ( 3*32+ 5) /* "" Athlon */
+ #define X86_FEATURE_P3                        ( 3*32+ 6) /* "" P3 */
+ #define X86_FEATURE_P4                        ( 3*32+ 7) /* "" P4 */
+ #define X86_FEATURE_CONSTANT_TSC      ( 3*32+ 8) /* TSC ticks at a constant rate */
+-#define X86_FEATURE_UP                        ( 3*32+ 9) /* smp kernel running on up */
+-#define X86_FEATURE_ART                       ( 3*32+10) /* Platform has always running timer (ART) */
++#define X86_FEATURE_UP                        ( 3*32+ 9) /* SMP kernel running on UP */
++#define X86_FEATURE_ART                       ( 3*32+10) /* Always running timer (ART) */
+ #define X86_FEATURE_ARCH_PERFMON      ( 3*32+11) /* Intel Architectural PerfMon */
+ #define X86_FEATURE_PEBS              ( 3*32+12) /* Precise-Event Based Sampling */
+ #define X86_FEATURE_BTS                       ( 3*32+13) /* Branch Trace Store */
+-#define X86_FEATURE_SYSCALL32         ( 3*32+14) /* "" syscall in ia32 userspace */
+-#define X86_FEATURE_SYSENTER32                ( 3*32+15) /* "" sysenter in ia32 userspace */
+-#define X86_FEATURE_REP_GOOD          ( 3*32+16) /* rep microcode works well */
+-#define X86_FEATURE_MFENCE_RDTSC      ( 3*32+17) /* "" Mfence synchronizes RDTSC */
+-#define X86_FEATURE_LFENCE_RDTSC      ( 3*32+18) /* "" Lfence synchronizes RDTSC */
++#define X86_FEATURE_SYSCALL32         ( 3*32+14) /* "" syscall in IA32 userspace */
++#define X86_FEATURE_SYSENTER32                ( 3*32+15) /* "" sysenter in IA32 userspace */
++#define X86_FEATURE_REP_GOOD          ( 3*32+16) /* REP microcode works well */
++#define X86_FEATURE_MFENCE_RDTSC      ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
++#define X86_FEATURE_LFENCE_RDTSC      ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
+ #define X86_FEATURE_ACC_POWER         ( 3*32+19) /* AMD Accumulated Power Mechanism */
+ #define X86_FEATURE_NOPL              ( 3*32+20) /* The NOPL (0F 1F) instructions */
+ #define X86_FEATURE_ALWAYS            ( 3*32+21) /* "" Always-present feature */
+-#define X86_FEATURE_XTOPOLOGY         ( 3*32+22) /* cpu topology enum extensions */
++#define X86_FEATURE_XTOPOLOGY         ( 3*32+22) /* CPU topology enum extensions */
+ #define X86_FEATURE_TSC_RELIABLE      ( 3*32+23) /* TSC is known to be reliable */
+ #define X86_FEATURE_NONSTOP_TSC               ( 3*32+24) /* TSC does not stop in C states */
+ #define X86_FEATURE_CPUID             ( 3*32+25) /* CPU has CPUID instruction itself */
+-#define X86_FEATURE_EXTD_APICID               ( 3*32+26) /* has extended APICID (8 bits) */
+-#define X86_FEATURE_AMD_DCM           ( 3*32+27) /* multi-node processor */
+-#define X86_FEATURE_APERFMPERF                ( 3*32+28) /* APERFMPERF */
++#define X86_FEATURE_EXTD_APICID               ( 3*32+26) /* Extended APICID (8 bits) */
++#define X86_FEATURE_AMD_DCM           ( 3*32+27) /* AMD multi-node processor */
++#define X86_FEATURE_APERFMPERF                ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+ #define X86_FEATURE_NONSTOP_TSC_S3    ( 3*32+30) /* TSC doesn't stop in S3 state */
+ #define X86_FEATURE_TSC_KNOWN_FREQ    ( 3*32+31) /* TSC has known frequency */
+ 
+-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
++/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
+ #define X86_FEATURE_XMM3              ( 4*32+ 0) /* "pni" SSE-3 */
+ #define X86_FEATURE_PCLMULQDQ         ( 4*32+ 1) /* PCLMULQDQ instruction */
+ #define X86_FEATURE_DTES64            ( 4*32+ 2) /* 64-bit Debug Store */
+-#define X86_FEATURE_MWAIT             ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
+-#define X86_FEATURE_DSCPL             ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
++#define X86_FEATURE_MWAIT             ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
++#define X86_FEATURE_DSCPL             ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
+ #define X86_FEATURE_VMX                       ( 4*32+ 5) /* Hardware virtualization */
+-#define X86_FEATURE_SMX                       ( 4*32+ 6) /* Safer mode */
++#define X86_FEATURE_SMX                       ( 4*32+ 6) /* Safer Mode eXtensions */
+ #define X86_FEATURE_EST                       ( 4*32+ 7) /* Enhanced SpeedStep */
+ #define X86_FEATURE_TM2                       ( 4*32+ 8) /* Thermal Monitor 2 */
+ #define X86_FEATURE_SSSE3             ( 4*32+ 9) /* Supplemental SSE-3 */
+ #define X86_FEATURE_CID                       ( 4*32+10) /* Context ID */
+ #define X86_FEATURE_SDBG              ( 4*32+11) /* Silicon Debug */
+ #define X86_FEATURE_FMA                       ( 4*32+12) /* Fused multiply-add */
+-#define X86_FEATURE_CX16              ( 4*32+13) /* CMPXCHG16B */
++#define X86_FEATURE_CX16              ( 4*32+13) /* CMPXCHG16B instruction */
+ #define X86_FEATURE_XTPR              ( 4*32+14) /* Send Task Priority Messages */
+-#define X86_FEATURE_PDCM              ( 4*32+15) /* Performance Capabilities */
++#define X86_FEATURE_PDCM              ( 4*32+15) /* Perf/Debug Capabilities MSR */
+ #define X86_FEATURE_PCID              ( 4*32+17) /* Process Context Identifiers */
+ #define X86_FEATURE_DCA                       ( 4*32+18) /* Direct Cache Access */
+ #define X86_FEATURE_XMM4_1            ( 4*32+19) /* "sse4_1" SSE-4.1 */
+ #define X86_FEATURE_XMM4_2            ( 4*32+20) /* "sse4_2" SSE-4.2 */
+-#define X86_FEATURE_X2APIC            ( 4*32+21) /* x2APIC */
++#define X86_FEATURE_X2APIC            ( 4*32+21) /* X2APIC */
+ #define X86_FEATURE_MOVBE             ( 4*32+22) /* MOVBE instruction */
+ #define X86_FEATURE_POPCNT            ( 4*32+23) /* POPCNT instruction */
+-#define X86_FEATURE_TSC_DEADLINE_TIMER        ( 4*32+24) /* Tsc deadline timer */
++#define X86_FEATURE_TSC_DEADLINE_TIMER        ( 4*32+24) /* TSC deadline timer */
+ #define X86_FEATURE_AES                       ( 4*32+25) /* AES instructions */
+-#define X86_FEATURE_XSAVE             ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+-#define X86_FEATURE_OSXSAVE           ( 4*32+27) /* "" XSAVE enabled in the OS */
++#define X86_FEATURE_XSAVE             ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
++#define X86_FEATURE_OSXSAVE           ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
+ #define X86_FEATURE_AVX                       ( 4*32+28) /* Advanced Vector Extensions */
+-#define X86_FEATURE_F16C              ( 4*32+29) /* 16-bit fp conversions */
+-#define X86_FEATURE_RDRAND            ( 4*32+30) /* The RDRAND instruction */
++#define X86_FEATURE_F16C              ( 4*32+29) /* 16-bit FP conversions */
++#define X86_FEATURE_RDRAND            ( 4*32+30) /* RDRAND instruction */
+ #define X86_FEATURE_HYPERVISOR                ( 4*32+31) /* Running on a hypervisor */
+ 
+ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+@@ -157,10 +155,10 @@
+ #define X86_FEATURE_PMM                       ( 5*32+12) /* PadLock Montgomery Multiplier */
+ #define X86_FEATURE_PMM_EN            ( 5*32+13) /* PMM enabled */
+ 
+-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
++/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
+ #define X86_FEATURE_LAHF_LM           ( 6*32+ 0) /* LAHF/SAHF in long mode */
+ #define X86_FEATURE_CMP_LEGACY                ( 6*32+ 1) /* If yes HyperThreading not valid */
+-#define X86_FEATURE_SVM                       ( 6*32+ 2) /* Secure virtual machine */
++#define X86_FEATURE_SVM                       ( 6*32+ 2) /* Secure Virtual Machine */
+ #define X86_FEATURE_EXTAPIC           ( 6*32+ 3) /* Extended APIC space */
+ #define X86_FEATURE_CR8_LEGACY                ( 6*32+ 4) /* CR8 in 32-bit mode */
+ #define X86_FEATURE_ABM                       ( 6*32+ 5) /* Advanced bit manipulation */
+@@ -174,16 +172,16 @@
+ #define X86_FEATURE_WDT                       ( 6*32+13) /* Watchdog timer */
+ #define X86_FEATURE_LWP                       ( 6*32+15) /* Light Weight Profiling */
+ #define X86_FEATURE_FMA4              ( 6*32+16) /* 4 operands MAC instructions */
+-#define X86_FEATURE_TCE                       ( 6*32+17) /* translation cache extension */
++#define X86_FEATURE_TCE                       ( 6*32+17) /* Translation Cache Extension */
+ #define X86_FEATURE_NODEID_MSR                ( 6*32+19) /* NodeId MSR */
+-#define X86_FEATURE_TBM                       ( 6*32+21) /* trailing bit manipulations */
+-#define X86_FEATURE_TOPOEXT           ( 6*32+22) /* topology extensions CPUID leafs */
+-#define X86_FEATURE_PERFCTR_CORE      ( 6*32+23) /* core performance counter extensions */
++#define X86_FEATURE_TBM                       ( 6*32+21) /* Trailing Bit Manipulations */
++#define X86_FEATURE_TOPOEXT           ( 6*32+22) /* Topology extensions CPUID leafs */
++#define X86_FEATURE_PERFCTR_CORE      ( 6*32+23) /* Core performance counter extensions */
+ #define X86_FEATURE_PERFCTR_NB                ( 6*32+24) /* NB performance counter extensions */
+-#define X86_FEATURE_BPEXT             (6*32+26) /* data breakpoint extension */
+-#define X86_FEATURE_PTSC              ( 6*32+27) /* performance time-stamp counter */
++#define X86_FEATURE_BPEXT             ( 6*32+26) /* Data breakpoint extension */
++#define X86_FEATURE_PTSC              ( 6*32+27) /* Performance time-stamp counter */
+ #define X86_FEATURE_PERFCTR_L2                ( 6*32+28) /* Last Level Cache performance counter extensions */
+-#define X86_FEATURE_MWAITX            ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
++#define X86_FEATURE_MWAITX            ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
+ 
+ /*
+  * Auxiliary flags: Linux defined - For features scattered in various
+@@ -191,7 +189,7 @@
+  *
+  * Reuse free bits when adding new feature flags!
+  */
+-#define X86_FEATURE_RING3MWAIT                ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
++#define X86_FEATURE_RING3MWAIT                ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
+ #define X86_FEATURE_CPUID_FAULT               ( 7*32+ 1) /* Intel CPUID faulting */
+ #define X86_FEATURE_CPB                       ( 7*32+ 2) /* AMD Core Performance Boost */
+ #define X86_FEATURE_EPB                       ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+@@ -205,8 +203,8 @@
+ 
+ #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_INTEL_PT          ( 7*32+15) /* Intel Processor Trace */
+-#define X86_FEATURE_AVX512_4VNNIW     (7*32+16) /* AVX-512 Neural Network Instructions */
+-#define X86_FEATURE_AVX512_4FMAPS     (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
++#define X86_FEATURE_AVX512_4VNNIW     ( 7*32+16) /* AVX-512 Neural Network Instructions */
++#define X86_FEATURE_AVX512_4FMAPS     ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ 
+ #define X86_FEATURE_MBA                       ( 7*32+18) /* Memory Bandwidth Allocation */
+ 
+@@ -217,19 +215,19 @@
+ #define X86_FEATURE_EPT                       ( 8*32+ 3) /* Intel Extended Page Table */
+ #define X86_FEATURE_VPID              ( 8*32+ 4) /* Intel Virtual Processor ID */
+ 
+-#define X86_FEATURE_VMMCALL           ( 8*32+15) /* Prefer vmmcall to vmcall */
++#define X86_FEATURE_VMMCALL           ( 8*32+15) /* Prefer VMMCALL to VMCALL */
+ #define X86_FEATURE_XENPV             ( 8*32+16) /* "" Xen paravirtual guest */
+ 
+ 
+-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+-#define X86_FEATURE_FSGSBASE          ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+-#define X86_FEATURE_TSC_ADJUST                ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
++/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
++#define X86_FEATURE_FSGSBASE          ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
++#define X86_FEATURE_TSC_ADJUST                ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
+ #define X86_FEATURE_BMI1              ( 9*32+ 3) /* 1st group bit manipulation extensions */
+ #define X86_FEATURE_HLE                       ( 9*32+ 4) /* Hardware Lock Elision */
+ #define X86_FEATURE_AVX2              ( 9*32+ 5) /* AVX2 instructions */
+ #define X86_FEATURE_SMEP              ( 9*32+ 7) /* Supervisor Mode Execution Protection */
+ #define X86_FEATURE_BMI2              ( 9*32+ 8) /* 2nd group bit manipulation extensions */
+-#define X86_FEATURE_ERMS              ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
++#define X86_FEATURE_ERMS              ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
+ #define X86_FEATURE_INVPCID           ( 9*32+10) /* Invalidate Processor Context ID */
+ #define X86_FEATURE_RTM                       ( 9*32+11) /* Restricted Transactional Memory */
+ #define X86_FEATURE_CQM                       ( 9*32+12) /* Cache QoS Monitoring */
+@@ -237,8 +235,8 @@
+ #define X86_FEATURE_RDT_A             ( 9*32+15) /* Resource Director Technology Allocation */
+ #define X86_FEATURE_AVX512F           ( 9*32+16) /* AVX-512 Foundation */
+ #define X86_FEATURE_AVX512DQ          ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+-#define X86_FEATURE_RDSEED            ( 9*32+18) /* The RDSEED instruction */
+-#define X86_FEATURE_ADX                       ( 9*32+19) /* The ADCX and ADOX instructions */
++#define X86_FEATURE_RDSEED            ( 9*32+18) /* RDSEED instruction */
++#define X86_FEATURE_ADX                       ( 9*32+19) /* ADCX and ADOX instructions */
+ #define X86_FEATURE_SMAP              ( 9*32+20) /* Supervisor Mode Access Prevention */
+ #define X86_FEATURE_AVX512IFMA                ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
+ #define X86_FEATURE_CLFLUSHOPT                ( 9*32+23) /* CLFLUSHOPT instruction */
+@@ -250,25 +248,25 @@
+ #define X86_FEATURE_AVX512BW          ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+ #define X86_FEATURE_AVX512VL          ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+ 
+-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
+-#define X86_FEATURE_XSAVEOPT          (10*32+ 0) /* XSAVEOPT */
+-#define X86_FEATURE_XSAVEC            (10*32+ 1) /* XSAVEC */
+-#define X86_FEATURE_XGETBV1           (10*32+ 2) /* XGETBV with ECX = 1 */
+-#define X86_FEATURE_XSAVES            (10*32+ 3) /* XSAVES/XRSTORS */
++/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
++#define X86_FEATURE_XSAVEOPT          (10*32+ 0) /* XSAVEOPT instruction */
++#define X86_FEATURE_XSAVEC            (10*32+ 1) /* XSAVEC instruction */
++#define X86_FEATURE_XGETBV1           (10*32+ 2) /* XGETBV with ECX = 1 instruction */
++#define X86_FEATURE_XSAVES            (10*32+ 3) /* XSAVES/XRSTORS instructions */
+ 
+-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
++/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
+ #define X86_FEATURE_CQM_LLC           (11*32+ 1) /* LLC QoS if 1 */
+ 
+-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+-#define X86_FEATURE_CQM_OCCUP_LLC     (12*32+ 0) /* LLC occupancy monitoring if 1 */
++/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
++#define X86_FEATURE_CQM_OCCUP_LLC     (12*32+ 0) /* LLC occupancy monitoring */
+ #define X86_FEATURE_CQM_MBM_TOTAL     (12*32+ 1) /* LLC Total MBM monitoring */
+ #define X86_FEATURE_CQM_MBM_LOCAL     (12*32+ 2) /* LLC Local MBM monitoring */
+ 
+-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+-#define X86_FEATURE_CLZERO            (13*32+0) /* CLZERO instruction */
+-#define X86_FEATURE_IRPERF            (13*32+1) /* Instructions Retired Count */
++/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
++#define X86_FEATURE_CLZERO            (13*32+ 0) /* CLZERO instruction */
++#define X86_FEATURE_IRPERF            (13*32+ 1) /* Instructions Retired Count */
+ 
+-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
++/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
+ #define X86_FEATURE_IDA                       (14*32+ 1) /* Intel Dynamic Acceleration */
+ #define X86_FEATURE_ARAT              (14*32+ 2) /* Always Running APIC Timer */
+@@ -280,7 +278,7 @@
+ #define X86_FEATURE_HWP_EPP           (14*32+10) /* HWP Energy Perf. Preference */
+ #define X86_FEATURE_HWP_PKG_REQ               (14*32+11) /* HWP Package Level Request */
+ 
+-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
++/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
+ #define X86_FEATURE_NPT                       (15*32+ 0) /* Nested Page Table support */
+ #define X86_FEATURE_LBRV              (15*32+ 1) /* LBR Virtualization support */
+ #define X86_FEATURE_SVML              (15*32+ 2) /* "svm_lock" SVM locking MSR */
+@@ -295,24 +293,24 @@
+ #define X86_FEATURE_V_VMSAVE_VMLOAD   (15*32+15) /* Virtual VMSAVE VMLOAD */
+ #define X86_FEATURE_VGIF              (15*32+16) /* Virtual GIF */
+ 
+-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
++/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
+ #define X86_FEATURE_AVX512VBMI                (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
+ #define X86_FEATURE_PKU                       (16*32+ 3) /* Protection Keys for Userspace */
+ #define X86_FEATURE_OSPKE             (16*32+ 4) /* OS Protection Keys Enable */
+ #define X86_FEATURE_AVX512_VBMI2      (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+ #define X86_FEATURE_GFNI              (16*32+ 8) /* Galois Field New Instructions */
+ #define X86_FEATURE_VAES              (16*32+ 9) /* Vector AES */
+-#define X86_FEATURE_VPCLMULQDQ                (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
+-#define X86_FEATURE_AVX512_VNNI               (16*32+ 11) /* Vector Neural Network Instructions */
+-#define X86_FEATURE_AVX512_BITALG     (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
++#define X86_FEATURE_VPCLMULQDQ                (16*32+10) /* Carry-Less Multiplication Double Quadword */
++#define X86_FEATURE_AVX512_VNNI               (16*32+11) /* Vector Neural Network Instructions */
++#define X86_FEATURE_AVX512_BITALG     (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+ #define X86_FEATURE_AVX512_VPOPCNTDQ  (16*32+14) /* POPCNT for vectors of DW/QW */
+ #define X86_FEATURE_LA57              (16*32+16) /* 5-level page tables */
+ #define X86_FEATURE_RDPID             (16*32+22) /* RDPID instruction */
+ 
+-/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
+-#define X86_FEATURE_OVERFLOW_RECOV    (17*32+0) /* MCA overflow recovery support */
+-#define X86_FEATURE_SUCCOR            (17*32+1) /* Uncorrectable error containment and recovery */
+-#define X86_FEATURE_SMCA              (17*32+3) /* Scalable MCA */
++/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
++#define X86_FEATURE_OVERFLOW_RECOV    (17*32+ 0) /* MCA overflow recovery support */
++#define X86_FEATURE_SUCCOR            (17*32+ 1) /* Uncorrectable error containment and recovery */
++#define X86_FEATURE_SMCA              (17*32+ 3) /* Scalable MCA */
+ 
+ /*
+  * BUG word(s)
+@@ -339,4 +337,5 @@
+ #define X86_BUG_SWAPGS_FENCE          X86_BUG(11) /* SWAPGS without input dep on GS */
+ #define X86_BUG_MONITOR                       X86_BUG(12) /* IPI required to wake up remote CPU */
+ #define X86_BUG_AMD_E400              X86_BUG(13) /* CPU is among the affected by Erratum 400 */
++
+ #endif /* _ASM_X86_CPUFEATURES_H */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0115-selftests-x86-ldt_gdt-Robustify-against-set_thread_a.patch b/patches/kernel/0115-selftests-x86-ldt_gdt-Robustify-against-set_thread_a.patch

deleted file mode 100644 (file)

index f3cb8f9..0000000
--- a/patches/kernel/0115-selftests-x86-ldt_gdt-Robustify-against-set_thread_a.patch
+++ /dev/null
@@ -1,64 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sat, 4 Nov 2017 04:19:49 -0700
-Subject: [PATCH] selftests/x86/ldt_gdt: Robustify against set_thread_area()
- and LAR oddities
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Bits 19:16 of LAR's result are undefined, and some upcoming
-improvements to the test case seem to trigger this.  Mask off those
-bits to avoid spurious failures.
-
-commit 5b781c7e317f ("x86/tls: Forcibly set the accessed bit in TLS
-segments") adds a valid case in which LAR's output doesn't quite
-agree with set_thread_area()'s input.  This isn't triggered in the
-test as is, but it will be if we start calling set_thread_area()
-with the accessed bit clear.  Work around this discrepency.
-
-I've added a Fixes tag so that -stable can pick this up if neccesary.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Fixes: 5b781c7e317f ("x86/tls: Forcibly set the accessed bit in TLS segments")
-Link: http://lkml.kernel.org/r/b82f3f89c034b53580970ac865139fd8863f44e2.1509794321.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit d60ad744c9741586010d4bea286f09a063a90fbd)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d4c2ffcf3efe0d9610919fd48f5a1a5e38c28c07)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/testing/selftests/x86/ldt_gdt.c | 10 +++++++++-
- 1 file changed, 9 insertions(+), 1 deletion(-)
-
-diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
-index b9a22f18566a..b2c54f4673f2 100644
---- a/tools/testing/selftests/x86/ldt_gdt.c
-+++ b/tools/testing/selftests/x86/ldt_gdt.c
-@@ -114,7 +114,15 @@ static void check_valid_segment(uint16_t index, int ldt,
-               return;
-       }
- 
--      if (ar != expected_ar) {
-+      /* The SDM says "bits 19:16 are undefined".  Thanks. */
-+      ar &= ~0xF0000;
-+
-+      /*
-+       * NB: Different Linux versions do different things with the
-+       * accessed bit in set_thread_area().
-+       */
-+      if (ar != expected_ar &&
-+          (ldt || ar != (expected_ar | AR_ACCESSED))) {
-               printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
-                      (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
-               nerrs++;
--- 
-2.14.2
-
diff --git a/patches/kernel/0115-selftests-x86-protection_keys-Fix-syscall-NR-redefin.patch b/patches/kernel/0115-selftests-x86-protection_keys-Fix-syscall-NR-redefin.patch

new file mode 100644 (file)

index 0000000..810e63a
--- /dev/null
+++ b/patches/kernel/0115-selftests-x86-protection_keys-Fix-syscall-NR-redefin.patch
@@ -0,0 +1,77 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 4 Nov 2017 04:19:48 -0700
+Subject: [PATCH] selftests/x86/protection_keys: Fix syscall NR redefinition
+ warnings
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+On new enough glibc, the pkey syscalls numbers are available.  Check
+first before defining them to avoid warnings like:
+
+protection_keys.c:198:0: warning: "SYS_pkey_alloc" redefined
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: http://lkml.kernel.org/r/1fbef53a9e6befb7165ff855fc1a7d4788a191d6.1509794321.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 693cb5580fdb026922363aa103add64b3ecd572e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 64c8ec4beb84ca8b0ff3250a8b6044d06be6315b)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/testing/selftests/x86/protection_keys.c | 24 ++++++++++++++++++------
+ 1 file changed, 18 insertions(+), 6 deletions(-)
+
+diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
+index 3237bc010e1c..3c54d5c40952 100644
+--- a/tools/testing/selftests/x86/protection_keys.c
++++ b/tools/testing/selftests/x86/protection_keys.c
+@@ -188,17 +188,29 @@ void lots_o_noops_around_write(int *write_to_me)
+ #define u64 uint64_t
+ 
+ #ifdef __i386__
+-#define SYS_mprotect_key 380
+-#define SYS_pkey_alloc         381
+-#define SYS_pkey_free  382
++
++#ifndef SYS_mprotect_key
++# define SYS_mprotect_key 380
++#endif
++#ifndef SYS_pkey_alloc
++# define SYS_pkey_alloc        381
++# define SYS_pkey_free         382
++#endif
+ #define REG_IP_IDX REG_EIP
+ #define si_pkey_offset 0x14
++
+ #else
+-#define SYS_mprotect_key 329
+-#define SYS_pkey_alloc         330
+-#define SYS_pkey_free  331
++
++#ifndef SYS_mprotect_key
++# define SYS_mprotect_key 329
++#endif
++#ifndef SYS_pkey_alloc
++# define SYS_pkey_alloc        330
++# define SYS_pkey_free         331
++#endif
+ #define REG_IP_IDX REG_RIP
+ #define si_pkey_offset 0x20
++
+ #endif
+ 
+ void dump_mem(void *dumpme, int len_bytes)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0116-selftests-x86-ldt_gdt-Add-infrastructure-to-test-set.patch b/patches/kernel/0116-selftests-x86-ldt_gdt-Add-infrastructure-to-test-set.patch

deleted file mode 100644 (file)

index 62ea6e9..0000000
--- a/patches/kernel/0116-selftests-x86-ldt_gdt-Add-infrastructure-to-test-set.patch
+++ /dev/null
@@ -1,114 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sat, 4 Nov 2017 04:19:50 -0700
-Subject: [PATCH] selftests/x86/ldt_gdt: Add infrastructure to test
- set_thread_area()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Much of the test design could apply to set_thread_area() (i.e. GDT),
-not just modify_ldt().  Add set_thread_area() to the
-install_valid_mode() helper.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/02c23f8fba5547007f741dc24c3926e5284ede02.1509794321.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit d744dcad39094c9187075e274d1cdef79c57c8b5)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d6ae7ac5849304e520538a6ce3111f372f809596)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/testing/selftests/x86/ldt_gdt.c | 53 ++++++++++++++++++++++++-----------
- 1 file changed, 37 insertions(+), 16 deletions(-)
-
-diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
-index b2c54f4673f2..337f217d0ae9 100644
---- a/tools/testing/selftests/x86/ldt_gdt.c
-+++ b/tools/testing/selftests/x86/ldt_gdt.c
-@@ -136,30 +136,51 @@ static void check_valid_segment(uint16_t index, int ldt,
-       }
- }
- 
--static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
--                             bool oldmode)
-+static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
-+                             bool oldmode, bool ldt)
- {
--      int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
--                        desc, sizeof(*desc));
--      if (ret < -1)
--              errno = -ret;
-+      struct user_desc desc = *d;
-+      int ret;
-+
-+      if (!ldt) {
-+#ifndef __i386__
-+              /* No point testing set_thread_area in a 64-bit build */
-+              return false;
-+#endif
-+              if (!gdt_entry_num)
-+                      return false;
-+              desc.entry_number = gdt_entry_num;
-+
-+              ret = syscall(SYS_set_thread_area, &desc);
-+      } else {
-+              ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
-+                            &desc, sizeof(desc));
-+
-+              if (ret < -1)
-+                      errno = -ret;
-+
-+              if (ret != 0 && errno == ENOSYS) {
-+                      printf("[OK]\tmodify_ldt returned -ENOSYS\n");
-+                      return false;
-+              }
-+      }
-+
-       if (ret == 0) {
--              uint32_t limit = desc->limit;
--              if (desc->limit_in_pages)
-+              uint32_t limit = desc.limit;
-+              if (desc.limit_in_pages)
-                       limit = (limit << 12) + 4095;
--              check_valid_segment(desc->entry_number, 1, ar, limit, true);
-+              check_valid_segment(desc.entry_number, ldt, ar, limit, true);
-               return true;
--      } else if (errno == ENOSYS) {
--              printf("[OK]\tmodify_ldt returned -ENOSYS\n");
--              return false;
-       } else {
--              if (desc->seg_32bit) {
--                      printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
-+              if (desc.seg_32bit) {
-+                      printf("[FAIL]\tUnexpected %s failure %d\n",
-+                             ldt ? "modify_ldt" : "set_thread_area",
-                              errno);
-                       nerrs++;
-                       return false;
-               } else {
--                      printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
-+                      printf("[OK]\t%s rejected 16 bit segment\n",
-+                             ldt ? "modify_ldt" : "set_thread_area");
-                       return false;
-               }
-       }
-@@ -167,7 +188,7 @@ static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
- 
- static bool install_valid(const struct user_desc *desc, uint32_t ar)
- {
--      return install_valid_mode(desc, ar, false);
-+      return install_valid_mode(desc, ar, false, true);
- }
- 
- static void install_invalid(const struct user_desc *desc, bool oldmode)
--- 
-2.14.2
-
diff --git a/patches/kernel/0116-selftests-x86-ldt_gdt-Robustify-against-set_thread_a.patch b/patches/kernel/0116-selftests-x86-ldt_gdt-Robustify-against-set_thread_a.patch

new file mode 100644 (file)

index 0000000..f3cb8f9
--- /dev/null
+++ b/patches/kernel/0116-selftests-x86-ldt_gdt-Robustify-against-set_thread_a.patch
@@ -0,0 +1,64 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 4 Nov 2017 04:19:49 -0700
+Subject: [PATCH] selftests/x86/ldt_gdt: Robustify against set_thread_area()
+ and LAR oddities
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Bits 19:16 of LAR's result are undefined, and some upcoming
+improvements to the test case seem to trigger this.  Mask off those
+bits to avoid spurious failures.
+
+commit 5b781c7e317f ("x86/tls: Forcibly set the accessed bit in TLS
+segments") adds a valid case in which LAR's output doesn't quite
+agree with set_thread_area()'s input.  This isn't triggered in the
+test as is, but it will be if we start calling set_thread_area()
+with the accessed bit clear.  Work around this discrepency.
+
+I've added a Fixes tag so that -stable can pick this up if neccesary.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Fixes: 5b781c7e317f ("x86/tls: Forcibly set the accessed bit in TLS segments")
+Link: http://lkml.kernel.org/r/b82f3f89c034b53580970ac865139fd8863f44e2.1509794321.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit d60ad744c9741586010d4bea286f09a063a90fbd)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d4c2ffcf3efe0d9610919fd48f5a1a5e38c28c07)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/testing/selftests/x86/ldt_gdt.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
+index b9a22f18566a..b2c54f4673f2 100644
+--- a/tools/testing/selftests/x86/ldt_gdt.c
++++ b/tools/testing/selftests/x86/ldt_gdt.c
+@@ -114,7 +114,15 @@ static void check_valid_segment(uint16_t index, int ldt,
+               return;
+       }
+ 
+-      if (ar != expected_ar) {
++      /* The SDM says "bits 19:16 are undefined".  Thanks. */
++      ar &= ~0xF0000;
++
++      /*
++       * NB: Different Linux versions do different things with the
++       * accessed bit in set_thread_area().
++       */
++      if (ar != expected_ar &&
++          (ldt || ar != (expected_ar | AR_ACCESSED))) {
+               printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
+                      (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
+               nerrs++;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0117-selftests-x86-ldt_gdt-Add-infrastructure-to-test-set.patch b/patches/kernel/0117-selftests-x86-ldt_gdt-Add-infrastructure-to-test-set.patch

new file mode 100644 (file)

index 0000000..62ea6e9
--- /dev/null
+++ b/patches/kernel/0117-selftests-x86-ldt_gdt-Add-infrastructure-to-test-set.patch
@@ -0,0 +1,114 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 4 Nov 2017 04:19:50 -0700
+Subject: [PATCH] selftests/x86/ldt_gdt: Add infrastructure to test
+ set_thread_area()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Much of the test design could apply to set_thread_area() (i.e. GDT),
+not just modify_ldt().  Add set_thread_area() to the
+install_valid_mode() helper.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/02c23f8fba5547007f741dc24c3926e5284ede02.1509794321.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit d744dcad39094c9187075e274d1cdef79c57c8b5)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d6ae7ac5849304e520538a6ce3111f372f809596)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/testing/selftests/x86/ldt_gdt.c | 53 ++++++++++++++++++++++++-----------
+ 1 file changed, 37 insertions(+), 16 deletions(-)
+
+diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
+index b2c54f4673f2..337f217d0ae9 100644
+--- a/tools/testing/selftests/x86/ldt_gdt.c
++++ b/tools/testing/selftests/x86/ldt_gdt.c
+@@ -136,30 +136,51 @@ static void check_valid_segment(uint16_t index, int ldt,
+       }
+ }
+ 
+-static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
+-                             bool oldmode)
++static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
++                             bool oldmode, bool ldt)
+ {
+-      int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+-                        desc, sizeof(*desc));
+-      if (ret < -1)
+-              errno = -ret;
++      struct user_desc desc = *d;
++      int ret;
++
++      if (!ldt) {
++#ifndef __i386__
++              /* No point testing set_thread_area in a 64-bit build */
++              return false;
++#endif
++              if (!gdt_entry_num)
++                      return false;
++              desc.entry_number = gdt_entry_num;
++
++              ret = syscall(SYS_set_thread_area, &desc);
++      } else {
++              ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
++                            &desc, sizeof(desc));
++
++              if (ret < -1)
++                      errno = -ret;
++
++              if (ret != 0 && errno == ENOSYS) {
++                      printf("[OK]\tmodify_ldt returned -ENOSYS\n");
++                      return false;
++              }
++      }
++
+       if (ret == 0) {
+-              uint32_t limit = desc->limit;
+-              if (desc->limit_in_pages)
++              uint32_t limit = desc.limit;
++              if (desc.limit_in_pages)
+                       limit = (limit << 12) + 4095;
+-              check_valid_segment(desc->entry_number, 1, ar, limit, true);
++              check_valid_segment(desc.entry_number, ldt, ar, limit, true);
+               return true;
+-      } else if (errno == ENOSYS) {
+-              printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+-              return false;
+       } else {
+-              if (desc->seg_32bit) {
+-                      printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
++              if (desc.seg_32bit) {
++                      printf("[FAIL]\tUnexpected %s failure %d\n",
++                             ldt ? "modify_ldt" : "set_thread_area",
+                              errno);
+                       nerrs++;
+                       return false;
+               } else {
+-                      printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
++                      printf("[OK]\t%s rejected 16 bit segment\n",
++                             ldt ? "modify_ldt" : "set_thread_area");
+                       return false;
+               }
+       }
+@@ -167,7 +188,7 @@ static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
+ 
+ static bool install_valid(const struct user_desc *desc, uint32_t ar)
+ {
+-      return install_valid_mode(desc, ar, false);
++      return install_valid_mode(desc, ar, false, true);
+ }
+ 
+ static void install_invalid(const struct user_desc *desc, bool oldmode)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0117-selftests-x86-ldt_gdt-Run-most-existing-LDT-test-cas.patch b/patches/kernel/0117-selftests-x86-ldt_gdt-Run-most-existing-LDT-test-cas.patch

deleted file mode 100644 (file)

index a1d34a3..0000000
--- a/patches/kernel/0117-selftests-x86-ldt_gdt-Run-most-existing-LDT-test-cas.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sat, 4 Nov 2017 04:19:51 -0700
-Subject: [PATCH] selftests/x86/ldt_gdt: Run most existing LDT test cases
- against the GDT as well
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Now that the main test infrastructure supports the GDT, run tests
-that will pass the kernel's GDT permission tests against the GDT.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/686a1eda63414da38fcecc2412db8dba1ae40581.1509794321.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit adedf2893c192dd09b1cc2f2dcfdd7cad99ec49d)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f05c092307d8479094d83d4337d66e6e86e730a9)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/testing/selftests/x86/ldt_gdt.c | 10 +++++++++-
- 1 file changed, 9 insertions(+), 1 deletion(-)
-
-diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
-index 337f217d0ae9..05d0d6f49c2c 100644
---- a/tools/testing/selftests/x86/ldt_gdt.c
-+++ b/tools/testing/selftests/x86/ldt_gdt.c
-@@ -188,7 +188,15 @@ static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
- 
- static bool install_valid(const struct user_desc *desc, uint32_t ar)
- {
--      return install_valid_mode(desc, ar, false, true);
-+      bool ret = install_valid_mode(desc, ar, false, true);
-+
-+      if (desc->contents <= 1 && desc->seg_32bit &&
-+          !desc->seg_not_present) {
-+              /* Should work in the GDT, too. */
-+              install_valid_mode(desc, ar, false, false);
-+      }
-+
-+      return ret;
- }
- 
- static void install_invalid(const struct user_desc *desc, bool oldmode)
--- 
-2.14.2
-
diff --git a/patches/kernel/0118-selftests-x86-ldt_gdt-Run-most-existing-LDT-test-cas.patch b/patches/kernel/0118-selftests-x86-ldt_gdt-Run-most-existing-LDT-test-cas.patch

new file mode 100644 (file)

index 0000000..a1d34a3
--- /dev/null
+++ b/patches/kernel/0118-selftests-x86-ldt_gdt-Run-most-existing-LDT-test-cas.patch
@@ -0,0 +1,54 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 4 Nov 2017 04:19:51 -0700
+Subject: [PATCH] selftests/x86/ldt_gdt: Run most existing LDT test cases
+ against the GDT as well
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Now that the main test infrastructure supports the GDT, run tests
+that will pass the kernel's GDT permission tests against the GDT.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/686a1eda63414da38fcecc2412db8dba1ae40581.1509794321.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit adedf2893c192dd09b1cc2f2dcfdd7cad99ec49d)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f05c092307d8479094d83d4337d66e6e86e730a9)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/testing/selftests/x86/ldt_gdt.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
+index 337f217d0ae9..05d0d6f49c2c 100644
+--- a/tools/testing/selftests/x86/ldt_gdt.c
++++ b/tools/testing/selftests/x86/ldt_gdt.c
+@@ -188,7 +188,15 @@ static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
+ 
+ static bool install_valid(const struct user_desc *desc, uint32_t ar)
+ {
+-      return install_valid_mode(desc, ar, false, true);
++      bool ret = install_valid_mode(desc, ar, false, true);
++
++      if (desc->contents <= 1 && desc->seg_32bit &&
++          !desc->seg_not_present) {
++              /* Should work in the GDT, too. */
++              install_valid_mode(desc, ar, false, false);
++      }
++
++      return ret;
+ }
+ 
+ static void install_invalid(const struct user_desc *desc, bool oldmode)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0118-selftests-x86-ldt_get-Add-a-few-additional-tests-for.patch b/patches/kernel/0118-selftests-x86-ldt_get-Add-a-few-additional-tests-for.patch

deleted file mode 100644 (file)

index ebb340b..0000000
--- a/patches/kernel/0118-selftests-x86-ldt_get-Add-a-few-additional-tests-for.patch
+++ /dev/null
@@ -1,66 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sat, 4 Nov 2017 04:19:52 -0700
-Subject: [PATCH] selftests/x86/ldt_get: Add a few additional tests for limits
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-We weren't testing the .limit and .limit_in_pages fields very well.
-Add more tests.
-
-This addition seems to trigger the "bits 16:19 are undefined" issue
-that was fixed in an earlier patch.  I think that, at least on my
-CPU, the high nibble of the limit ends in LAR bits 16:19.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/5601c15ea9b3113d288953fd2838b18bedf6bc67.1509794321.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit fec8f5ae1715a01c72ad52cb2ecd8aacaf142302)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit a1cdabf77d7ed9ba02697ad8beb04adf46a7c7b8)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/testing/selftests/x86/ldt_gdt.c | 17 ++++++++++++++++-
- 1 file changed, 16 insertions(+), 1 deletion(-)
-
-diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
-index 05d0d6f49c2c..8e290c9b2c3f 100644
---- a/tools/testing/selftests/x86/ldt_gdt.c
-+++ b/tools/testing/selftests/x86/ldt_gdt.c
-@@ -403,9 +403,24 @@ static void do_simple_tests(void)
-       install_invalid(&desc, false);
- 
-       desc.seg_not_present = 0;
--      desc.read_exec_only = 0;
-       desc.seg_32bit = 1;
-+      desc.read_exec_only = 0;
-+      desc.limit = 0xfffff;
-+
-       install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
-+
-+      desc.limit_in_pages = 1;
-+
-+      install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G);
-+      desc.read_exec_only = 1;
-+      install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G);
-+      desc.contents = 1;
-+      desc.read_exec_only = 0;
-+      install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
-+      desc.read_exec_only = 1;
-+      install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
-+
-+      desc.limit = 0;
-       install_invalid(&desc, true);
- }
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0119-ACPI-APEI-Replace-ioremap_page_range-with-fixmap.patch b/patches/kernel/0119-ACPI-APEI-Replace-ioremap_page_range-with-fixmap.patch

deleted file mode 100644 (file)

index cb047d9..0000000
--- a/patches/kernel/0119-ACPI-APEI-Replace-ioremap_page_range-with-fixmap.patch
+++ /dev/null
@@ -1,190 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: James Morse <james.morse@arm.com>
-Date: Mon, 6 Nov 2017 18:44:24 +0000
-Subject: [PATCH] ACPI / APEI: Replace ioremap_page_range() with fixmap
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Replace ghes_io{re,un}map_pfn_{nmi,irq}()s use of ioremap_page_range()
-with __set_fixmap() as ioremap_page_range() may sleep to allocate a new
-level of page-table, even if its passed an existing final-address to
-use in the mapping.
-
-The GHES driver can only be enabled for architectures that select
-HAVE_ACPI_APEI: Add fixmap entries to both x86 and arm64.
-
-clear_fixmap() does the TLB invalidation in __set_fixmap() for arm64
-and __set_pte_vaddr() for x86. In each case its the same as the
-respective arch_apei_flush_tlb_one().
-
-Reported-by: Fengguang Wu <fengguang.wu@intel.com>
-Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
-Signed-off-by: James Morse <james.morse@arm.com>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Tested-by: Tyler Baicar <tbaicar@codeaurora.org>
-Tested-by: Toshi Kani <toshi.kani@hpe.com>
-[ For the arm64 bits: ]
-Acked-by: Will Deacon <will.deacon@arm.com>
-[ For the x86 bits: ]
-Acked-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-Cc: All applicable <stable@vger.kernel.org>
-(cherry picked from commit 4f89fa286f6729312e227e7c2d764e8e7b9d340e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit eda363b23c1601f733cb1d7d66d1a4975c4c5d09)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/arm64/include/asm/fixmap.h |  7 +++++++
- arch/x86/include/asm/fixmap.h   |  6 ++++++
- drivers/acpi/apei/ghes.c        | 44 +++++++++++++----------------------------
- 3 files changed, 27 insertions(+), 30 deletions(-)
-
-diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
-index caf86be815ba..4052ec39e8db 100644
---- a/arch/arm64/include/asm/fixmap.h
-+++ b/arch/arm64/include/asm/fixmap.h
-@@ -51,6 +51,13 @@ enum fixed_addresses {
- 
-       FIX_EARLYCON_MEM_BASE,
-       FIX_TEXT_POKE0,
-+
-+#ifdef CONFIG_ACPI_APEI_GHES
-+      /* Used for GHES mapping from assorted contexts */
-+      FIX_APEI_GHES_IRQ,
-+      FIX_APEI_GHES_NMI,
-+#endif /* CONFIG_ACPI_APEI_GHES */
-+
-       __end_of_permanent_fixed_addresses,
- 
-       /*
-diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
-index b65155cc3760..81c2b11f50a6 100644
---- a/arch/x86/include/asm/fixmap.h
-+++ b/arch/x86/include/asm/fixmap.h
-@@ -104,6 +104,12 @@ enum fixed_addresses {
-       FIX_GDT_REMAP_BEGIN,
-       FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
- 
-+#ifdef CONFIG_ACPI_APEI_GHES
-+      /* Used for GHES mapping from assorted contexts */
-+      FIX_APEI_GHES_IRQ,
-+      FIX_APEI_GHES_NMI,
-+#endif
-+
-       __end_of_permanent_fixed_addresses,
- 
-       /*
-diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
-index 4827176f838d..f9f106e62e74 100644
---- a/drivers/acpi/apei/ghes.c
-+++ b/drivers/acpi/apei/ghes.c
-@@ -51,6 +51,7 @@
- #include <acpi/actbl1.h>
- #include <acpi/ghes.h>
- #include <acpi/apei.h>
-+#include <asm/fixmap.h>
- #include <asm/tlbflush.h>
- #include <ras/ras_event.h>
- 
-@@ -112,7 +113,7 @@ static DEFINE_MUTEX(ghes_list_mutex);
-  * Because the memory area used to transfer hardware error information
-  * from BIOS to Linux can be determined only in NMI, IRQ or timer
-  * handler, but general ioremap can not be used in atomic context, so
-- * a special version of atomic ioremap is implemented for that.
-+ * the fixmap is used instead.
-  */
- 
- /*
-@@ -126,8 +127,8 @@ static DEFINE_MUTEX(ghes_list_mutex);
- /* virtual memory area for atomic ioremap */
- static struct vm_struct *ghes_ioremap_area;
- /*
-- * These 2 spinlock is used to prevent atomic ioremap virtual memory
-- * area from being mapped simultaneously.
-+ * These 2 spinlocks are used to prevent the fixmap entries from being used
-+ * simultaneously.
-  */
- static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
- static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
-@@ -159,53 +160,36 @@ static void ghes_ioremap_exit(void)
- 
- static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
- {
--      unsigned long vaddr;
-       phys_addr_t paddr;
-       pgprot_t prot;
- 
--      vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
--
-       paddr = pfn << PAGE_SHIFT;
-       prot = arch_apei_get_mem_attribute(paddr);
--      ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
-+      __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
- 
--      return (void __iomem *)vaddr;
-+      return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
- }
- 
- static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
- {
--      unsigned long vaddr;
-       phys_addr_t paddr;
-       pgprot_t prot;
- 
--      vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
--
-       paddr = pfn << PAGE_SHIFT;
-       prot = arch_apei_get_mem_attribute(paddr);
-+      __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
- 
--      ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
--
--      return (void __iomem *)vaddr;
-+      return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
- }
- 
--static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
-+static void ghes_iounmap_nmi(void)
- {
--      unsigned long vaddr = (unsigned long __force)vaddr_ptr;
--      void *base = ghes_ioremap_area->addr;
--
--      BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
--      unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
--      arch_apei_flush_tlb_one(vaddr);
-+      clear_fixmap(FIX_APEI_GHES_NMI);
- }
- 
--static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
-+static void ghes_iounmap_irq(void)
- {
--      unsigned long vaddr = (unsigned long __force)vaddr_ptr;
--      void *base = ghes_ioremap_area->addr;
--
--      BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
--      unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
--      arch_apei_flush_tlb_one(vaddr);
-+      clear_fixmap(FIX_APEI_GHES_IRQ);
- }
- 
- static int ghes_estatus_pool_init(void)
-@@ -361,10 +345,10 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
-               paddr += trunk;
-               buffer += trunk;
-               if (in_nmi) {
--                      ghes_iounmap_nmi(vaddr);
-+                      ghes_iounmap_nmi();
-                       raw_spin_unlock(&ghes_ioremap_lock_nmi);
-               } else {
--                      ghes_iounmap_irq(vaddr);
-+                      ghes_iounmap_irq();
-                       spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
-               }
-       }
--- 
-2.14.2
-
diff --git a/patches/kernel/0119-selftests-x86-ldt_get-Add-a-few-additional-tests-for.patch b/patches/kernel/0119-selftests-x86-ldt_get-Add-a-few-additional-tests-for.patch

new file mode 100644 (file)

index 0000000..ebb340b
--- /dev/null
+++ b/patches/kernel/0119-selftests-x86-ldt_get-Add-a-few-additional-tests-for.patch
@@ -0,0 +1,66 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 4 Nov 2017 04:19:52 -0700
+Subject: [PATCH] selftests/x86/ldt_get: Add a few additional tests for limits
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+We weren't testing the .limit and .limit_in_pages fields very well.
+Add more tests.
+
+This addition seems to trigger the "bits 16:19 are undefined" issue
+that was fixed in an earlier patch.  I think that, at least on my
+CPU, the high nibble of the limit ends in LAR bits 16:19.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/5601c15ea9b3113d288953fd2838b18bedf6bc67.1509794321.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit fec8f5ae1715a01c72ad52cb2ecd8aacaf142302)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit a1cdabf77d7ed9ba02697ad8beb04adf46a7c7b8)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/testing/selftests/x86/ldt_gdt.c | 17 ++++++++++++++++-
+ 1 file changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
+index 05d0d6f49c2c..8e290c9b2c3f 100644
+--- a/tools/testing/selftests/x86/ldt_gdt.c
++++ b/tools/testing/selftests/x86/ldt_gdt.c
+@@ -403,9 +403,24 @@ static void do_simple_tests(void)
+       install_invalid(&desc, false);
+ 
+       desc.seg_not_present = 0;
+-      desc.read_exec_only = 0;
+       desc.seg_32bit = 1;
++      desc.read_exec_only = 0;
++      desc.limit = 0xfffff;
++
+       install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
++
++      desc.limit_in_pages = 1;
++
++      install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G);
++      desc.read_exec_only = 1;
++      install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G);
++      desc.contents = 1;
++      desc.read_exec_only = 0;
++      install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
++      desc.read_exec_only = 1;
++      install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
++
++      desc.limit = 0;
+       install_invalid(&desc, true);
+ }
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0120-ACPI-APEI-Replace-ioremap_page_range-with-fixmap.patch b/patches/kernel/0120-ACPI-APEI-Replace-ioremap_page_range-with-fixmap.patch

new file mode 100644 (file)

index 0000000..cb047d9
--- /dev/null
+++ b/patches/kernel/0120-ACPI-APEI-Replace-ioremap_page_range-with-fixmap.patch
@@ -0,0 +1,190 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: James Morse <james.morse@arm.com>
+Date: Mon, 6 Nov 2017 18:44:24 +0000
+Subject: [PATCH] ACPI / APEI: Replace ioremap_page_range() with fixmap
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Replace ghes_io{re,un}map_pfn_{nmi,irq}()s use of ioremap_page_range()
+with __set_fixmap() as ioremap_page_range() may sleep to allocate a new
+level of page-table, even if its passed an existing final-address to
+use in the mapping.
+
+The GHES driver can only be enabled for architectures that select
+HAVE_ACPI_APEI: Add fixmap entries to both x86 and arm64.
+
+clear_fixmap() does the TLB invalidation in __set_fixmap() for arm64
+and __set_pte_vaddr() for x86. In each case its the same as the
+respective arch_apei_flush_tlb_one().
+
+Reported-by: Fengguang Wu <fengguang.wu@intel.com>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: James Morse <james.morse@arm.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Tested-by: Tyler Baicar <tbaicar@codeaurora.org>
+Tested-by: Toshi Kani <toshi.kani@hpe.com>
+[ For the arm64 bits: ]
+Acked-by: Will Deacon <will.deacon@arm.com>
+[ For the x86 bits: ]
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: All applicable <stable@vger.kernel.org>
+(cherry picked from commit 4f89fa286f6729312e227e7c2d764e8e7b9d340e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit eda363b23c1601f733cb1d7d66d1a4975c4c5d09)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/arm64/include/asm/fixmap.h |  7 +++++++
+ arch/x86/include/asm/fixmap.h   |  6 ++++++
+ drivers/acpi/apei/ghes.c        | 44 +++++++++++++----------------------------
+ 3 files changed, 27 insertions(+), 30 deletions(-)
+
+diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
+index caf86be815ba..4052ec39e8db 100644
+--- a/arch/arm64/include/asm/fixmap.h
++++ b/arch/arm64/include/asm/fixmap.h
+@@ -51,6 +51,13 @@ enum fixed_addresses {
+ 
+       FIX_EARLYCON_MEM_BASE,
+       FIX_TEXT_POKE0,
++
++#ifdef CONFIG_ACPI_APEI_GHES
++      /* Used for GHES mapping from assorted contexts */
++      FIX_APEI_GHES_IRQ,
++      FIX_APEI_GHES_NMI,
++#endif /* CONFIG_ACPI_APEI_GHES */
++
+       __end_of_permanent_fixed_addresses,
+ 
+       /*
+diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
+index b65155cc3760..81c2b11f50a6 100644
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -104,6 +104,12 @@ enum fixed_addresses {
+       FIX_GDT_REMAP_BEGIN,
+       FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+ 
++#ifdef CONFIG_ACPI_APEI_GHES
++      /* Used for GHES mapping from assorted contexts */
++      FIX_APEI_GHES_IRQ,
++      FIX_APEI_GHES_NMI,
++#endif
++
+       __end_of_permanent_fixed_addresses,
+ 
+       /*
+diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
+index 4827176f838d..f9f106e62e74 100644
+--- a/drivers/acpi/apei/ghes.c
++++ b/drivers/acpi/apei/ghes.c
+@@ -51,6 +51,7 @@
+ #include <acpi/actbl1.h>
+ #include <acpi/ghes.h>
+ #include <acpi/apei.h>
++#include <asm/fixmap.h>
+ #include <asm/tlbflush.h>
+ #include <ras/ras_event.h>
+ 
+@@ -112,7 +113,7 @@ static DEFINE_MUTEX(ghes_list_mutex);
+  * Because the memory area used to transfer hardware error information
+  * from BIOS to Linux can be determined only in NMI, IRQ or timer
+  * handler, but general ioremap can not be used in atomic context, so
+- * a special version of atomic ioremap is implemented for that.
++ * the fixmap is used instead.
+  */
+ 
+ /*
+@@ -126,8 +127,8 @@ static DEFINE_MUTEX(ghes_list_mutex);
+ /* virtual memory area for atomic ioremap */
+ static struct vm_struct *ghes_ioremap_area;
+ /*
+- * These 2 spinlock is used to prevent atomic ioremap virtual memory
+- * area from being mapped simultaneously.
++ * These 2 spinlocks are used to prevent the fixmap entries from being used
++ * simultaneously.
+  */
+ static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
+ static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
+@@ -159,53 +160,36 @@ static void ghes_ioremap_exit(void)
+ 
+ static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
+ {
+-      unsigned long vaddr;
+       phys_addr_t paddr;
+       pgprot_t prot;
+ 
+-      vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
+-
+       paddr = pfn << PAGE_SHIFT;
+       prot = arch_apei_get_mem_attribute(paddr);
+-      ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
++      __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
+ 
+-      return (void __iomem *)vaddr;
++      return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
+ }
+ 
+ static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
+ {
+-      unsigned long vaddr;
+       phys_addr_t paddr;
+       pgprot_t prot;
+ 
+-      vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
+-
+       paddr = pfn << PAGE_SHIFT;
+       prot = arch_apei_get_mem_attribute(paddr);
++      __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
+ 
+-      ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
+-
+-      return (void __iomem *)vaddr;
++      return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
+ }
+ 
+-static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
++static void ghes_iounmap_nmi(void)
+ {
+-      unsigned long vaddr = (unsigned long __force)vaddr_ptr;
+-      void *base = ghes_ioremap_area->addr;
+-
+-      BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
+-      unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
+-      arch_apei_flush_tlb_one(vaddr);
++      clear_fixmap(FIX_APEI_GHES_NMI);
+ }
+ 
+-static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
++static void ghes_iounmap_irq(void)
+ {
+-      unsigned long vaddr = (unsigned long __force)vaddr_ptr;
+-      void *base = ghes_ioremap_area->addr;
+-
+-      BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
+-      unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
+-      arch_apei_flush_tlb_one(vaddr);
++      clear_fixmap(FIX_APEI_GHES_IRQ);
+ }
+ 
+ static int ghes_estatus_pool_init(void)
+@@ -361,10 +345,10 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
+               paddr += trunk;
+               buffer += trunk;
+               if (in_nmi) {
+-                      ghes_iounmap_nmi(vaddr);
++                      ghes_iounmap_nmi();
+                       raw_spin_unlock(&ghes_ioremap_lock_nmi);
+               } else {
+-                      ghes_iounmap_irq(vaddr);
++                      ghes_iounmap_irq();
+                       spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
+               }
+       }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0120-x86-virt-x86-platform-Merge-struct-x86_hyper-into-st.patch b/patches/kernel/0120-x86-virt-x86-platform-Merge-struct-x86_hyper-into-st.patch

deleted file mode 100644 (file)

index 7fa6116..0000000
--- a/patches/kernel/0120-x86-virt-x86-platform-Merge-struct-x86_hyper-into-st.patch
+++ /dev/null
@@ -1,406 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Thu, 9 Nov 2017 14:27:35 +0100
-Subject: [PATCH] x86/virt, x86/platform: Merge 'struct x86_hyper' into 'struct
- x86_platform' and 'struct x86_init'
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Instead of x86_hyper being either NULL on bare metal or a pointer to a
-struct hypervisor_x86 in case of the kernel running as a guest merge
-the struct into x86_platform and x86_init.
-
-This will remove the need for wrappers making it hard to find out what
-is being called. With dummy functions added for all callbacks testing
-for a NULL function pointer can be removed, too.
-
-Suggested-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Acked-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: akataria@vmware.com
-Cc: boris.ostrovsky@oracle.com
-Cc: devel@linuxdriverproject.org
-Cc: haiyangz@microsoft.com
-Cc: kvm@vger.kernel.org
-Cc: kys@microsoft.com
-Cc: pbonzini@redhat.com
-Cc: rkrcmar@redhat.com
-Cc: rusty@rustcorp.com.au
-Cc: sthemmin@microsoft.com
-Cc: virtualization@lists.linux-foundation.org
-Cc: xen-devel@lists.xenproject.org
-Link: http://lkml.kernel.org/r/20171109132739.23465-2-jgross@suse.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit f72e38e8ec8869ac0ba5a75d7d2f897d98a1454e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 2d0b017b38623bca666acbcb5ab251315845fa55)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/hypervisor.h | 25 ++++--------------
- arch/x86/include/asm/x86_init.h   | 24 +++++++++++++++++
- include/linux/hypervisor.h        |  8 ++++--
- arch/x86/kernel/apic/apic.c       |  2 +-
- arch/x86/kernel/cpu/hypervisor.c  | 54 +++++++++++++++++++--------------------
- arch/x86/kernel/cpu/mshyperv.c    |  2 +-
- arch/x86/kernel/cpu/vmware.c      |  4 +--
- arch/x86/kernel/kvm.c             |  2 +-
- arch/x86/kernel/x86_init.c        |  9 +++++++
- arch/x86/mm/init.c                |  2 +-
- arch/x86/xen/enlighten_hvm.c      |  8 +++---
- arch/x86/xen/enlighten_pv.c       |  2 +-
- 12 files changed, 81 insertions(+), 61 deletions(-)
-
-diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
-index 0ead9dbb9130..0eca7239a7aa 100644
---- a/arch/x86/include/asm/hypervisor.h
-+++ b/arch/x86/include/asm/hypervisor.h
-@@ -23,6 +23,7 @@
- #ifdef CONFIG_HYPERVISOR_GUEST
- 
- #include <asm/kvm_para.h>
-+#include <asm/x86_init.h>
- #include <asm/xen/hypervisor.h>
- 
- /*
-@@ -35,17 +36,11 @@ struct hypervisor_x86 {
-       /* Detection routine */
-       uint32_t        (*detect)(void);
- 
--      /* Platform setup (run once per boot) */
--      void            (*init_platform)(void);
-+      /* init time callbacks */
-+      struct x86_hyper_init init;
- 
--      /* X2APIC detection (run once per boot) */
--      bool            (*x2apic_available)(void);
--
--      /* pin current vcpu to specified physical cpu (run rarely) */
--      void            (*pin_vcpu)(int);
--
--      /* called during init_mem_mapping() to setup early mappings. */
--      void            (*init_mem_mapping)(void);
-+      /* runtime callbacks */
-+      struct x86_hyper_runtime runtime;
- };
- 
- extern const struct hypervisor_x86 *x86_hyper;
-@@ -58,17 +53,7 @@ extern const struct hypervisor_x86 x86_hyper_xen_hvm;
- extern const struct hypervisor_x86 x86_hyper_kvm;
- 
- extern void init_hypervisor_platform(void);
--extern bool hypervisor_x2apic_available(void);
--extern void hypervisor_pin_vcpu(int cpu);
--
--static inline void hypervisor_init_mem_mapping(void)
--{
--      if (x86_hyper && x86_hyper->init_mem_mapping)
--              x86_hyper->init_mem_mapping();
--}
- #else
- static inline void init_hypervisor_platform(void) { }
--static inline bool hypervisor_x2apic_available(void) { return false; }
--static inline void hypervisor_init_mem_mapping(void) { }
- #endif /* CONFIG_HYPERVISOR_GUEST */
- #endif /* _ASM_X86_HYPERVISOR_H */
-diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
-index 7ba7e90a9ad6..4d95e5a13c0b 100644
---- a/arch/x86/include/asm/x86_init.h
-+++ b/arch/x86/include/asm/x86_init.h
-@@ -113,6 +113,18 @@ struct x86_init_pci {
-       void (*fixup_irqs)(void);
- };
- 
-+/**
-+ * struct x86_hyper_init - x86 hypervisor init functions
-+ * @init_platform:            platform setup
-+ * @x2apic_available:         X2APIC detection
-+ * @init_mem_mapping:         setup early mappings during init_mem_mapping()
-+ */
-+struct x86_hyper_init {
-+      void (*init_platform)(void);
-+      bool (*x2apic_available)(void);
-+      void (*init_mem_mapping)(void);
-+};
-+
- /**
-  * struct x86_init_ops - functions for platform specific setup
-  *
-@@ -126,6 +138,7 @@ struct x86_init_ops {
-       struct x86_init_timers          timers;
-       struct x86_init_iommu           iommu;
-       struct x86_init_pci             pci;
-+      struct x86_hyper_init           hyper;
- };
- 
- /**
-@@ -198,6 +211,15 @@ struct x86_legacy_features {
-       struct x86_legacy_devices devices;
- };
- 
-+/**
-+ * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks
-+ *
-+ * @pin_vcpu:         pin current vcpu to specified physical cpu (run rarely)
-+ */
-+struct x86_hyper_runtime {
-+      void (*pin_vcpu)(int cpu);
-+};
-+
- /**
-  * struct x86_platform_ops - platform specific runtime functions
-  * @calibrate_cpu:            calibrate CPU
-@@ -217,6 +239,7 @@ struct x86_legacy_features {
-  *                            possible in x86_early_init_platform_quirks() by
-  *                            only using the current x86_hardware_subarch
-  *                            semantics.
-+ * @hyper:                    x86 hypervisor specific runtime callbacks
-  */
- struct x86_platform_ops {
-       unsigned long (*calibrate_cpu)(void);
-@@ -232,6 +255,7 @@ struct x86_platform_ops {
-       void (*apic_post_init)(void);
-       struct x86_legacy_features legacy;
-       void (*set_legacy_features)(void);
-+      struct x86_hyper_runtime hyper;
- };
- 
- struct pci_dev;
-diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h
-index 3fa5ef2b3759..35e170ca87a8 100644
---- a/include/linux/hypervisor.h
-+++ b/include/linux/hypervisor.h
-@@ -6,8 +6,12 @@
-  *            Juergen Gross <jgross@suse.com>
-  */
- 
--#ifdef CONFIG_HYPERVISOR_GUEST
--#include <asm/hypervisor.h>
-+#ifdef CONFIG_X86
-+#include <asm/x86_init.h>
-+static inline void hypervisor_pin_vcpu(int cpu)
-+{
-+      x86_platform.hyper.pin_vcpu(cpu);
-+}
- #else
- static inline void hypervisor_pin_vcpu(int cpu)
- {
-diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
-index 4a7f962b53ff..bb63c1350524 100644
---- a/arch/x86/kernel/apic/apic.c
-+++ b/arch/x86/kernel/apic/apic.c
-@@ -1666,7 +1666,7 @@ static __init void try_to_enable_x2apic(int remap_mode)
-                * under KVM
-                */
-               if (max_physical_apicid > 255 ||
--                  !hypervisor_x2apic_available()) {
-+                  !x86_init.hyper.x2apic_available()) {
-                       pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
-                       x2apic_disable();
-                       return;
-diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
-index 4fa90006ac68..22226c1bf092 100644
---- a/arch/x86/kernel/cpu/hypervisor.c
-+++ b/arch/x86/kernel/cpu/hypervisor.c
-@@ -44,51 +44,49 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
- const struct hypervisor_x86 *x86_hyper;
- EXPORT_SYMBOL(x86_hyper);
- 
--static inline void __init
-+static inline const struct hypervisor_x86 * __init
- detect_hypervisor_vendor(void)
- {
--      const struct hypervisor_x86 *h, * const *p;
-+      const struct hypervisor_x86 *h = NULL, * const *p;
-       uint32_t pri, max_pri = 0;
- 
-       for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
--              h = *p;
--              pri = h->detect();
--              if (pri != 0 && pri > max_pri) {
-+              pri = (*p)->detect();
-+              if (pri > max_pri) {
-                       max_pri = pri;
--                      x86_hyper = h;
-+                      h = *p;
-               }
-       }
- 
--      if (max_pri)
--              pr_info("Hypervisor detected: %s\n", x86_hyper->name);
-+      if (h)
-+              pr_info("Hypervisor detected: %s\n", h->name);
-+
-+      return h;
- }
- 
--void __init init_hypervisor_platform(void)
-+static void __init copy_array(const void *src, void *target, unsigned int size)
- {
-+      unsigned int i, n = size / sizeof(void *);
-+      const void * const *from = (const void * const *)src;
-+      const void **to = (const void **)target;
- 
--      detect_hypervisor_vendor();
--
--      if (!x86_hyper)
--              return;
--
--      if (x86_hyper->init_platform)
--              x86_hyper->init_platform();
-+      for (i = 0; i < n; i++)
-+              if (from[i])
-+                      to[i] = from[i];
- }
- 
--bool __init hypervisor_x2apic_available(void)
-+void __init init_hypervisor_platform(void)
- {
--      return x86_hyper                   &&
--             x86_hyper->x2apic_available &&
--             x86_hyper->x2apic_available();
--}
-+      const struct hypervisor_x86 *h;
- 
--void hypervisor_pin_vcpu(int cpu)
--{
--      if (!x86_hyper)
-+      h = detect_hypervisor_vendor();
-+
-+      if (!h)
-               return;
- 
--      if (x86_hyper->pin_vcpu)
--              x86_hyper->pin_vcpu(cpu);
--      else
--              WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
-+      copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
-+      copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
-+
-+      x86_hyper = h;
-+      x86_init.hyper.init_platform();
- }
-diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
-index 70e717fccdd6..9707e431da27 100644
---- a/arch/x86/kernel/cpu/mshyperv.c
-+++ b/arch/x86/kernel/cpu/mshyperv.c
-@@ -255,6 +255,6 @@ static void __init ms_hyperv_init_platform(void)
- const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
-       .name                   = "Microsoft HyperV",
-       .detect                 = ms_hyperv_platform,
--      .init_platform          = ms_hyperv_init_platform,
-+      .init.init_platform     = ms_hyperv_init_platform,
- };
- EXPORT_SYMBOL(x86_hyper_ms_hyperv);
-diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
-index 40ed26852ebd..4804c1d063c8 100644
---- a/arch/x86/kernel/cpu/vmware.c
-+++ b/arch/x86/kernel/cpu/vmware.c
-@@ -208,7 +208,7 @@ static bool __init vmware_legacy_x2apic_available(void)
- const __refconst struct hypervisor_x86 x86_hyper_vmware = {
-       .name                   = "VMware",
-       .detect                 = vmware_platform,
--      .init_platform          = vmware_platform_setup,
--      .x2apic_available       = vmware_legacy_x2apic_available,
-+      .init.init_platform     = vmware_platform_setup,
-+      .init.x2apic_available  = vmware_legacy_x2apic_available,
- };
- EXPORT_SYMBOL(x86_hyper_vmware);
-diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
-index 9e3798b00e40..54e373bfeab9 100644
---- a/arch/x86/kernel/kvm.c
-+++ b/arch/x86/kernel/kvm.c
-@@ -547,7 +547,7 @@ static uint32_t __init kvm_detect(void)
- const struct hypervisor_x86 x86_hyper_kvm __refconst = {
-       .name                   = "KVM",
-       .detect                 = kvm_detect,
--      .x2apic_available       = kvm_para_available,
-+      .init.x2apic_available  = kvm_para_available,
- };
- EXPORT_SYMBOL_GPL(x86_hyper_kvm);
- 
-diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
-index a088b2c47f73..5b2d10c1973a 100644
---- a/arch/x86/kernel/x86_init.c
-+++ b/arch/x86/kernel/x86_init.c
-@@ -28,6 +28,8 @@ void x86_init_noop(void) { }
- void __init x86_init_uint_noop(unsigned int unused) { }
- int __init iommu_init_noop(void) { return 0; }
- void iommu_shutdown_noop(void) { }
-+bool __init bool_x86_init_noop(void) { return false; }
-+void x86_op_int_noop(int cpu) { }
- 
- /*
-  * The platform setup functions are preset with the default functions
-@@ -81,6 +83,12 @@ struct x86_init_ops x86_init __initdata = {
-               .init_irq               = x86_default_pci_init_irq,
-               .fixup_irqs             = x86_default_pci_fixup_irqs,
-       },
-+
-+      .hyper = {
-+              .init_platform          = x86_init_noop,
-+              .x2apic_available       = bool_x86_init_noop,
-+              .init_mem_mapping       = x86_init_noop,
-+      },
- };
- 
- struct x86_cpuinit_ops x86_cpuinit = {
-@@ -101,6 +109,7 @@ struct x86_platform_ops x86_platform __ro_after_init = {
-       .get_nmi_reason                 = default_get_nmi_reason,
-       .save_sched_clock_state         = tsc_save_sched_clock_state,
-       .restore_sched_clock_state      = tsc_restore_sched_clock_state,
-+      .hyper.pin_vcpu                 = x86_op_int_noop,
- };
- 
- EXPORT_SYMBOL_GPL(x86_platform);
-diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
-index af5c1ed21d43..a22c2b95e513 100644
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -671,7 +671,7 @@ void __init init_mem_mapping(void)
-       load_cr3(swapper_pg_dir);
-       __flush_tlb_all();
- 
--      hypervisor_init_mem_mapping();
-+      x86_init.hyper.init_mem_mapping();
- 
-       early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
- }
-diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
-index de503c225ae1..7b1622089f96 100644
---- a/arch/x86/xen/enlighten_hvm.c
-+++ b/arch/x86/xen/enlighten_hvm.c
-@@ -229,9 +229,9 @@ static uint32_t __init xen_platform_hvm(void)
- const struct hypervisor_x86 x86_hyper_xen_hvm = {
-       .name                   = "Xen HVM",
-       .detect                 = xen_platform_hvm,
--      .init_platform          = xen_hvm_guest_init,
--      .pin_vcpu               = xen_pin_vcpu,
--      .x2apic_available       = xen_x2apic_para_available,
--      .init_mem_mapping       = xen_hvm_init_mem_mapping,
-+      .init.init_platform     = xen_hvm_guest_init,
-+      .init.x2apic_available  = xen_x2apic_para_available,
-+      .init.init_mem_mapping  = xen_hvm_init_mem_mapping,
-+      .runtime.pin_vcpu       = xen_pin_vcpu,
- };
- EXPORT_SYMBOL(x86_hyper_xen_hvm);
-diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
-index e7b213047724..4110fc9e5ee9 100644
---- a/arch/x86/xen/enlighten_pv.c
-+++ b/arch/x86/xen/enlighten_pv.c
-@@ -1461,6 +1461,6 @@ static uint32_t __init xen_platform_pv(void)
- const struct hypervisor_x86 x86_hyper_xen_pv = {
-       .name                   = "Xen PV",
-       .detect                 = xen_platform_pv,
--      .pin_vcpu               = xen_pin_vcpu,
-+      .runtime.pin_vcpu       = xen_pin_vcpu,
- };
- EXPORT_SYMBOL(x86_hyper_xen_pv);
--- 
-2.14.2
-
diff --git a/patches/kernel/0121-x86-virt-Add-enum-for-hypervisors-to-replace-x86_hyp.patch b/patches/kernel/0121-x86-virt-Add-enum-for-hypervisors-to-replace-x86_hyp.patch

deleted file mode 100644 (file)

index 02762ba..0000000
--- a/patches/kernel/0121-x86-virt-Add-enum-for-hypervisors-to-replace-x86_hyp.patch
+++ /dev/null
@@ -1,301 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Thu, 9 Nov 2017 14:27:36 +0100
-Subject: [PATCH] x86/virt: Add enum for hypervisors to replace x86_hyper
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The x86_hyper pointer is only used for checking whether a virtual
-device is supporting the hypervisor the system is running on.
-
-Use an enum for that purpose instead and drop the x86_hyper pointer.
-
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Acked-by: Thomas Gleixner <tglx@linutronix.de>
-Acked-by: Xavier Deguillard <xdeguillard@vmware.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: akataria@vmware.com
-Cc: arnd@arndb.de
-Cc: boris.ostrovsky@oracle.com
-Cc: devel@linuxdriverproject.org
-Cc: dmitry.torokhov@gmail.com
-Cc: gregkh@linuxfoundation.org
-Cc: haiyangz@microsoft.com
-Cc: kvm@vger.kernel.org
-Cc: kys@microsoft.com
-Cc: linux-graphics-maintainer@vmware.com
-Cc: linux-input@vger.kernel.org
-Cc: moltmann@vmware.com
-Cc: pbonzini@redhat.com
-Cc: pv-drivers@vmware.com
-Cc: rkrcmar@redhat.com
-Cc: sthemmin@microsoft.com
-Cc: virtualization@lists.linux-foundation.org
-Cc: xen-devel@lists.xenproject.org
-Link: http://lkml.kernel.org/r/20171109132739.23465-3-jgross@suse.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit 03b2a320b19f1424e9ac9c21696be9c60b6d0d93)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit c24b0a226fadfe1abe78fa568ff84fea6ecd7ca5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/hypervisor.h | 23 ++++++++++++++---------
- arch/x86/hyperv/hv_init.c         |  2 +-
- arch/x86/kernel/cpu/hypervisor.c  | 12 +++++++++---
- arch/x86/kernel/cpu/mshyperv.c    |  6 +++---
- arch/x86/kernel/cpu/vmware.c      |  4 ++--
- arch/x86/kernel/kvm.c             |  4 ++--
- arch/x86/xen/enlighten_hvm.c      |  4 ++--
- arch/x86/xen/enlighten_pv.c       |  4 ++--
- drivers/hv/vmbus_drv.c            |  2 +-
- drivers/input/mouse/vmmouse.c     | 10 ++++------
- drivers/misc/vmw_balloon.c        |  2 +-
- 11 files changed, 41 insertions(+), 32 deletions(-)
-
-diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
-index 0eca7239a7aa..1b0a5abcd8ae 100644
---- a/arch/x86/include/asm/hypervisor.h
-+++ b/arch/x86/include/asm/hypervisor.h
-@@ -29,6 +29,16 @@
- /*
-  * x86 hypervisor information
-  */
-+
-+enum x86_hypervisor_type {
-+      X86_HYPER_NATIVE = 0,
-+      X86_HYPER_VMWARE,
-+      X86_HYPER_MS_HYPERV,
-+      X86_HYPER_XEN_PV,
-+      X86_HYPER_XEN_HVM,
-+      X86_HYPER_KVM,
-+};
-+
- struct hypervisor_x86 {
-       /* Hypervisor name */
-       const char      *name;
-@@ -36,6 +46,9 @@ struct hypervisor_x86 {
-       /* Detection routine */
-       uint32_t        (*detect)(void);
- 
-+      /* Hypervisor type */
-+      enum x86_hypervisor_type type;
-+
-       /* init time callbacks */
-       struct x86_hyper_init init;
- 
-@@ -43,15 +56,7 @@ struct hypervisor_x86 {
-       struct x86_hyper_runtime runtime;
- };
- 
--extern const struct hypervisor_x86 *x86_hyper;
--
--/* Recognized hypervisors */
--extern const struct hypervisor_x86 x86_hyper_vmware;
--extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
--extern const struct hypervisor_x86 x86_hyper_xen_pv;
--extern const struct hypervisor_x86 x86_hyper_xen_hvm;
--extern const struct hypervisor_x86 x86_hyper_kvm;
--
-+extern enum x86_hypervisor_type x86_hyper_type;
- extern void init_hypervisor_platform(void);
- #else
- static inline void init_hypervisor_platform(void) { }
-diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
-index ec7c9661743f..32fa894139d5 100644
---- a/arch/x86/hyperv/hv_init.c
-+++ b/arch/x86/hyperv/hv_init.c
-@@ -99,7 +99,7 @@ void hyperv_init(void)
-       u64 guest_id;
-       union hv_x64_msr_hypercall_contents hypercall_msr;
- 
--      if (x86_hyper != &x86_hyper_ms_hyperv)
-+      if (x86_hyper_type != X86_HYPER_MS_HYPERV)
-               return;
- 
-       /*
-diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
-index 22226c1bf092..bea8d3e24f50 100644
---- a/arch/x86/kernel/cpu/hypervisor.c
-+++ b/arch/x86/kernel/cpu/hypervisor.c
-@@ -26,6 +26,12 @@
- #include <asm/processor.h>
- #include <asm/hypervisor.h>
- 
-+extern const struct hypervisor_x86 x86_hyper_vmware;
-+extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
-+extern const struct hypervisor_x86 x86_hyper_xen_pv;
-+extern const struct hypervisor_x86 x86_hyper_xen_hvm;
-+extern const struct hypervisor_x86 x86_hyper_kvm;
-+
- static const __initconst struct hypervisor_x86 * const hypervisors[] =
- {
- #ifdef CONFIG_XEN_PV
-@@ -41,8 +47,8 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
- #endif
- };
- 
--const struct hypervisor_x86 *x86_hyper;
--EXPORT_SYMBOL(x86_hyper);
-+enum x86_hypervisor_type x86_hyper_type;
-+EXPORT_SYMBOL(x86_hyper_type);
- 
- static inline const struct hypervisor_x86 * __init
- detect_hypervisor_vendor(void)
-@@ -87,6 +93,6 @@ void __init init_hypervisor_platform(void)
-       copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
-       copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
- 
--      x86_hyper = h;
-+      x86_hyper_type = h->type;
-       x86_init.hyper.init_platform();
- }
-diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
-index 9707e431da27..3672f1192119 100644
---- a/arch/x86/kernel/cpu/mshyperv.c
-+++ b/arch/x86/kernel/cpu/mshyperv.c
-@@ -252,9 +252,9 @@ static void __init ms_hyperv_init_platform(void)
- #endif
- }
- 
--const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
--      .name                   = "Microsoft HyperV",
-+const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
-+      .name                   = "Microsoft Hyper-V",
-       .detect                 = ms_hyperv_platform,
-+      .type                   = X86_HYPER_MS_HYPERV,
-       .init.init_platform     = ms_hyperv_init_platform,
- };
--EXPORT_SYMBOL(x86_hyper_ms_hyperv);
-diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
-index 4804c1d063c8..8e005329648b 100644
---- a/arch/x86/kernel/cpu/vmware.c
-+++ b/arch/x86/kernel/cpu/vmware.c
-@@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void)
-              (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
- }
- 
--const __refconst struct hypervisor_x86 x86_hyper_vmware = {
-+const __initconst struct hypervisor_x86 x86_hyper_vmware = {
-       .name                   = "VMware",
-       .detect                 = vmware_platform,
-+      .type                   = X86_HYPER_VMWARE,
-       .init.init_platform     = vmware_platform_setup,
-       .init.x2apic_available  = vmware_legacy_x2apic_available,
- };
--EXPORT_SYMBOL(x86_hyper_vmware);
-diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
-index 54e373bfeab9..b65a51a24647 100644
---- a/arch/x86/kernel/kvm.c
-+++ b/arch/x86/kernel/kvm.c
-@@ -544,12 +544,12 @@ static uint32_t __init kvm_detect(void)
-       return kvm_cpuid_base();
- }
- 
--const struct hypervisor_x86 x86_hyper_kvm __refconst = {
-+const __initconst struct hypervisor_x86 x86_hyper_kvm = {
-       .name                   = "KVM",
-       .detect                 = kvm_detect,
-+      .type                   = X86_HYPER_KVM,
-       .init.x2apic_available  = kvm_para_available,
- };
--EXPORT_SYMBOL_GPL(x86_hyper_kvm);
- 
- static __init int activate_jump_labels(void)
- {
-diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
-index 7b1622089f96..754d5391d9fa 100644
---- a/arch/x86/xen/enlighten_hvm.c
-+++ b/arch/x86/xen/enlighten_hvm.c
-@@ -226,12 +226,12 @@ static uint32_t __init xen_platform_hvm(void)
-       return xen_cpuid_base();
- }
- 
--const struct hypervisor_x86 x86_hyper_xen_hvm = {
-+const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = {
-       .name                   = "Xen HVM",
-       .detect                 = xen_platform_hvm,
-+      .type                   = X86_HYPER_XEN_HVM,
-       .init.init_platform     = xen_hvm_guest_init,
-       .init.x2apic_available  = xen_x2apic_para_available,
-       .init.init_mem_mapping  = xen_hvm_init_mem_mapping,
-       .runtime.pin_vcpu       = xen_pin_vcpu,
- };
--EXPORT_SYMBOL(x86_hyper_xen_hvm);
-diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
-index 4110fc9e5ee9..63c81154083b 100644
---- a/arch/x86/xen/enlighten_pv.c
-+++ b/arch/x86/xen/enlighten_pv.c
-@@ -1458,9 +1458,9 @@ static uint32_t __init xen_platform_pv(void)
-       return 0;
- }
- 
--const struct hypervisor_x86 x86_hyper_xen_pv = {
-+const __initconst struct hypervisor_x86 x86_hyper_xen_pv = {
-       .name                   = "Xen PV",
-       .detect                 = xen_platform_pv,
-+      .type                   = X86_HYPER_XEN_PV,
-       .runtime.pin_vcpu       = xen_pin_vcpu,
- };
--EXPORT_SYMBOL(x86_hyper_xen_pv);
-diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
-index 5ad627044dd1..8aca7613e482 100644
---- a/drivers/hv/vmbus_drv.c
-+++ b/drivers/hv/vmbus_drv.c
-@@ -1551,7 +1551,7 @@ static int __init hv_acpi_init(void)
- {
-       int ret, t;
- 
--      if (x86_hyper != &x86_hyper_ms_hyperv)
-+      if (x86_hyper_type != X86_HYPER_MS_HYPERV)
-               return -ENODEV;
- 
-       init_completion(&probe_event);
-diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c
-index 0f586780ceb4..1ae5c1ef3f5b 100644
---- a/drivers/input/mouse/vmmouse.c
-+++ b/drivers/input/mouse/vmmouse.c
-@@ -316,11 +316,9 @@ static int vmmouse_enable(struct psmouse *psmouse)
- /*
-  * Array of supported hypervisors.
-  */
--static const struct hypervisor_x86 *vmmouse_supported_hypervisors[] = {
--      &x86_hyper_vmware,
--#ifdef CONFIG_KVM_GUEST
--      &x86_hyper_kvm,
--#endif
-+static enum x86_hypervisor_type vmmouse_supported_hypervisors[] = {
-+      X86_HYPER_VMWARE,
-+      X86_HYPER_KVM,
- };
- 
- /**
-@@ -331,7 +329,7 @@ static bool vmmouse_check_hypervisor(void)
-       int i;
- 
-       for (i = 0; i < ARRAY_SIZE(vmmouse_supported_hypervisors); i++)
--              if (vmmouse_supported_hypervisors[i] == x86_hyper)
-+              if (vmmouse_supported_hypervisors[i] == x86_hyper_type)
-                       return true;
- 
-       return false;
-diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
-index 1e688bfec567..9047c0a529b2 100644
---- a/drivers/misc/vmw_balloon.c
-+++ b/drivers/misc/vmw_balloon.c
-@@ -1271,7 +1271,7 @@ static int __init vmballoon_init(void)
-        * Check if we are running on VMware's hypervisor and bail out
-        * if we are not.
-        */
--      if (x86_hyper != &x86_hyper_vmware)
-+      if (x86_hyper_type != X86_HYPER_VMWARE)
-               return -ENODEV;
- 
-       for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
--- 
-2.14.2
-
diff --git a/patches/kernel/0121-x86-virt-x86-platform-Merge-struct-x86_hyper-into-st.patch b/patches/kernel/0121-x86-virt-x86-platform-Merge-struct-x86_hyper-into-st.patch

new file mode 100644 (file)

index 0000000..7fa6116
--- /dev/null
+++ b/patches/kernel/0121-x86-virt-x86-platform-Merge-struct-x86_hyper-into-st.patch
@@ -0,0 +1,406 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 9 Nov 2017 14:27:35 +0100
+Subject: [PATCH] x86/virt, x86/platform: Merge 'struct x86_hyper' into 'struct
+ x86_platform' and 'struct x86_init'
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Instead of x86_hyper being either NULL on bare metal or a pointer to a
+struct hypervisor_x86 in case of the kernel running as a guest merge
+the struct into x86_platform and x86_init.
+
+This will remove the need for wrappers making it hard to find out what
+is being called. With dummy functions added for all callbacks testing
+for a NULL function pointer can be removed, too.
+
+Suggested-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: akataria@vmware.com
+Cc: boris.ostrovsky@oracle.com
+Cc: devel@linuxdriverproject.org
+Cc: haiyangz@microsoft.com
+Cc: kvm@vger.kernel.org
+Cc: kys@microsoft.com
+Cc: pbonzini@redhat.com
+Cc: rkrcmar@redhat.com
+Cc: rusty@rustcorp.com.au
+Cc: sthemmin@microsoft.com
+Cc: virtualization@lists.linux-foundation.org
+Cc: xen-devel@lists.xenproject.org
+Link: http://lkml.kernel.org/r/20171109132739.23465-2-jgross@suse.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit f72e38e8ec8869ac0ba5a75d7d2f897d98a1454e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 2d0b017b38623bca666acbcb5ab251315845fa55)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/hypervisor.h | 25 ++++--------------
+ arch/x86/include/asm/x86_init.h   | 24 +++++++++++++++++
+ include/linux/hypervisor.h        |  8 ++++--
+ arch/x86/kernel/apic/apic.c       |  2 +-
+ arch/x86/kernel/cpu/hypervisor.c  | 54 +++++++++++++++++++--------------------
+ arch/x86/kernel/cpu/mshyperv.c    |  2 +-
+ arch/x86/kernel/cpu/vmware.c      |  4 +--
+ arch/x86/kernel/kvm.c             |  2 +-
+ arch/x86/kernel/x86_init.c        |  9 +++++++
+ arch/x86/mm/init.c                |  2 +-
+ arch/x86/xen/enlighten_hvm.c      |  8 +++---
+ arch/x86/xen/enlighten_pv.c       |  2 +-
+ 12 files changed, 81 insertions(+), 61 deletions(-)
+
+diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
+index 0ead9dbb9130..0eca7239a7aa 100644
+--- a/arch/x86/include/asm/hypervisor.h
++++ b/arch/x86/include/asm/hypervisor.h
+@@ -23,6 +23,7 @@
+ #ifdef CONFIG_HYPERVISOR_GUEST
+ 
+ #include <asm/kvm_para.h>
++#include <asm/x86_init.h>
+ #include <asm/xen/hypervisor.h>
+ 
+ /*
+@@ -35,17 +36,11 @@ struct hypervisor_x86 {
+       /* Detection routine */
+       uint32_t        (*detect)(void);
+ 
+-      /* Platform setup (run once per boot) */
+-      void            (*init_platform)(void);
++      /* init time callbacks */
++      struct x86_hyper_init init;
+ 
+-      /* X2APIC detection (run once per boot) */
+-      bool            (*x2apic_available)(void);
+-
+-      /* pin current vcpu to specified physical cpu (run rarely) */
+-      void            (*pin_vcpu)(int);
+-
+-      /* called during init_mem_mapping() to setup early mappings. */
+-      void            (*init_mem_mapping)(void);
++      /* runtime callbacks */
++      struct x86_hyper_runtime runtime;
+ };
+ 
+ extern const struct hypervisor_x86 *x86_hyper;
+@@ -58,17 +53,7 @@ extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+ extern const struct hypervisor_x86 x86_hyper_kvm;
+ 
+ extern void init_hypervisor_platform(void);
+-extern bool hypervisor_x2apic_available(void);
+-extern void hypervisor_pin_vcpu(int cpu);
+-
+-static inline void hypervisor_init_mem_mapping(void)
+-{
+-      if (x86_hyper && x86_hyper->init_mem_mapping)
+-              x86_hyper->init_mem_mapping();
+-}
+ #else
+ static inline void init_hypervisor_platform(void) { }
+-static inline bool hypervisor_x2apic_available(void) { return false; }
+-static inline void hypervisor_init_mem_mapping(void) { }
+ #endif /* CONFIG_HYPERVISOR_GUEST */
+ #endif /* _ASM_X86_HYPERVISOR_H */
+diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
+index 7ba7e90a9ad6..4d95e5a13c0b 100644
+--- a/arch/x86/include/asm/x86_init.h
++++ b/arch/x86/include/asm/x86_init.h
+@@ -113,6 +113,18 @@ struct x86_init_pci {
+       void (*fixup_irqs)(void);
+ };
+ 
++/**
++ * struct x86_hyper_init - x86 hypervisor init functions
++ * @init_platform:            platform setup
++ * @x2apic_available:         X2APIC detection
++ * @init_mem_mapping:         setup early mappings during init_mem_mapping()
++ */
++struct x86_hyper_init {
++      void (*init_platform)(void);
++      bool (*x2apic_available)(void);
++      void (*init_mem_mapping)(void);
++};
++
+ /**
+  * struct x86_init_ops - functions for platform specific setup
+  *
+@@ -126,6 +138,7 @@ struct x86_init_ops {
+       struct x86_init_timers          timers;
+       struct x86_init_iommu           iommu;
+       struct x86_init_pci             pci;
++      struct x86_hyper_init           hyper;
+ };
+ 
+ /**
+@@ -198,6 +211,15 @@ struct x86_legacy_features {
+       struct x86_legacy_devices devices;
+ };
+ 
++/**
++ * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks
++ *
++ * @pin_vcpu:         pin current vcpu to specified physical cpu (run rarely)
++ */
++struct x86_hyper_runtime {
++      void (*pin_vcpu)(int cpu);
++};
++
+ /**
+  * struct x86_platform_ops - platform specific runtime functions
+  * @calibrate_cpu:            calibrate CPU
+@@ -217,6 +239,7 @@ struct x86_legacy_features {
+  *                            possible in x86_early_init_platform_quirks() by
+  *                            only using the current x86_hardware_subarch
+  *                            semantics.
++ * @hyper:                    x86 hypervisor specific runtime callbacks
+  */
+ struct x86_platform_ops {
+       unsigned long (*calibrate_cpu)(void);
+@@ -232,6 +255,7 @@ struct x86_platform_ops {
+       void (*apic_post_init)(void);
+       struct x86_legacy_features legacy;
+       void (*set_legacy_features)(void);
++      struct x86_hyper_runtime hyper;
+ };
+ 
+ struct pci_dev;
+diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h
+index 3fa5ef2b3759..35e170ca87a8 100644
+--- a/include/linux/hypervisor.h
++++ b/include/linux/hypervisor.h
+@@ -6,8 +6,12 @@
+  *            Juergen Gross <jgross@suse.com>
+  */
+ 
+-#ifdef CONFIG_HYPERVISOR_GUEST
+-#include <asm/hypervisor.h>
++#ifdef CONFIG_X86
++#include <asm/x86_init.h>
++static inline void hypervisor_pin_vcpu(int cpu)
++{
++      x86_platform.hyper.pin_vcpu(cpu);
++}
+ #else
+ static inline void hypervisor_pin_vcpu(int cpu)
+ {
+diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
+index 4a7f962b53ff..bb63c1350524 100644
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -1666,7 +1666,7 @@ static __init void try_to_enable_x2apic(int remap_mode)
+                * under KVM
+                */
+               if (max_physical_apicid > 255 ||
+-                  !hypervisor_x2apic_available()) {
++                  !x86_init.hyper.x2apic_available()) {
+                       pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
+                       x2apic_disable();
+                       return;
+diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
+index 4fa90006ac68..22226c1bf092 100644
+--- a/arch/x86/kernel/cpu/hypervisor.c
++++ b/arch/x86/kernel/cpu/hypervisor.c
+@@ -44,51 +44,49 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
+ const struct hypervisor_x86 *x86_hyper;
+ EXPORT_SYMBOL(x86_hyper);
+ 
+-static inline void __init
++static inline const struct hypervisor_x86 * __init
+ detect_hypervisor_vendor(void)
+ {
+-      const struct hypervisor_x86 *h, * const *p;
++      const struct hypervisor_x86 *h = NULL, * const *p;
+       uint32_t pri, max_pri = 0;
+ 
+       for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
+-              h = *p;
+-              pri = h->detect();
+-              if (pri != 0 && pri > max_pri) {
++              pri = (*p)->detect();
++              if (pri > max_pri) {
+                       max_pri = pri;
+-                      x86_hyper = h;
++                      h = *p;
+               }
+       }
+ 
+-      if (max_pri)
+-              pr_info("Hypervisor detected: %s\n", x86_hyper->name);
++      if (h)
++              pr_info("Hypervisor detected: %s\n", h->name);
++
++      return h;
+ }
+ 
+-void __init init_hypervisor_platform(void)
++static void __init copy_array(const void *src, void *target, unsigned int size)
+ {
++      unsigned int i, n = size / sizeof(void *);
++      const void * const *from = (const void * const *)src;
++      const void **to = (const void **)target;
+ 
+-      detect_hypervisor_vendor();
+-
+-      if (!x86_hyper)
+-              return;
+-
+-      if (x86_hyper->init_platform)
+-              x86_hyper->init_platform();
++      for (i = 0; i < n; i++)
++              if (from[i])
++                      to[i] = from[i];
+ }
+ 
+-bool __init hypervisor_x2apic_available(void)
++void __init init_hypervisor_platform(void)
+ {
+-      return x86_hyper                   &&
+-             x86_hyper->x2apic_available &&
+-             x86_hyper->x2apic_available();
+-}
++      const struct hypervisor_x86 *h;
+ 
+-void hypervisor_pin_vcpu(int cpu)
+-{
+-      if (!x86_hyper)
++      h = detect_hypervisor_vendor();
++
++      if (!h)
+               return;
+ 
+-      if (x86_hyper->pin_vcpu)
+-              x86_hyper->pin_vcpu(cpu);
+-      else
+-              WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
++      copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
++      copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
++
++      x86_hyper = h;
++      x86_init.hyper.init_platform();
+ }
+diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
+index 70e717fccdd6..9707e431da27 100644
+--- a/arch/x86/kernel/cpu/mshyperv.c
++++ b/arch/x86/kernel/cpu/mshyperv.c
+@@ -255,6 +255,6 @@ static void __init ms_hyperv_init_platform(void)
+ const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
+       .name                   = "Microsoft HyperV",
+       .detect                 = ms_hyperv_platform,
+-      .init_platform          = ms_hyperv_init_platform,
++      .init.init_platform     = ms_hyperv_init_platform,
+ };
+ EXPORT_SYMBOL(x86_hyper_ms_hyperv);
+diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
+index 40ed26852ebd..4804c1d063c8 100644
+--- a/arch/x86/kernel/cpu/vmware.c
++++ b/arch/x86/kernel/cpu/vmware.c
+@@ -208,7 +208,7 @@ static bool __init vmware_legacy_x2apic_available(void)
+ const __refconst struct hypervisor_x86 x86_hyper_vmware = {
+       .name                   = "VMware",
+       .detect                 = vmware_platform,
+-      .init_platform          = vmware_platform_setup,
+-      .x2apic_available       = vmware_legacy_x2apic_available,
++      .init.init_platform     = vmware_platform_setup,
++      .init.x2apic_available  = vmware_legacy_x2apic_available,
+ };
+ EXPORT_SYMBOL(x86_hyper_vmware);
+diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
+index 9e3798b00e40..54e373bfeab9 100644
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -547,7 +547,7 @@ static uint32_t __init kvm_detect(void)
+ const struct hypervisor_x86 x86_hyper_kvm __refconst = {
+       .name                   = "KVM",
+       .detect                 = kvm_detect,
+-      .x2apic_available       = kvm_para_available,
++      .init.x2apic_available  = kvm_para_available,
+ };
+ EXPORT_SYMBOL_GPL(x86_hyper_kvm);
+ 
+diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
+index a088b2c47f73..5b2d10c1973a 100644
+--- a/arch/x86/kernel/x86_init.c
++++ b/arch/x86/kernel/x86_init.c
+@@ -28,6 +28,8 @@ void x86_init_noop(void) { }
+ void __init x86_init_uint_noop(unsigned int unused) { }
+ int __init iommu_init_noop(void) { return 0; }
+ void iommu_shutdown_noop(void) { }
++bool __init bool_x86_init_noop(void) { return false; }
++void x86_op_int_noop(int cpu) { }
+ 
+ /*
+  * The platform setup functions are preset with the default functions
+@@ -81,6 +83,12 @@ struct x86_init_ops x86_init __initdata = {
+               .init_irq               = x86_default_pci_init_irq,
+               .fixup_irqs             = x86_default_pci_fixup_irqs,
+       },
++
++      .hyper = {
++              .init_platform          = x86_init_noop,
++              .x2apic_available       = bool_x86_init_noop,
++              .init_mem_mapping       = x86_init_noop,
++      },
+ };
+ 
+ struct x86_cpuinit_ops x86_cpuinit = {
+@@ -101,6 +109,7 @@ struct x86_platform_ops x86_platform __ro_after_init = {
+       .get_nmi_reason                 = default_get_nmi_reason,
+       .save_sched_clock_state         = tsc_save_sched_clock_state,
+       .restore_sched_clock_state      = tsc_restore_sched_clock_state,
++      .hyper.pin_vcpu                 = x86_op_int_noop,
+ };
+ 
+ EXPORT_SYMBOL_GPL(x86_platform);
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index af5c1ed21d43..a22c2b95e513 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -671,7 +671,7 @@ void __init init_mem_mapping(void)
+       load_cr3(swapper_pg_dir);
+       __flush_tlb_all();
+ 
+-      hypervisor_init_mem_mapping();
++      x86_init.hyper.init_mem_mapping();
+ 
+       early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
+ }
+diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
+index de503c225ae1..7b1622089f96 100644
+--- a/arch/x86/xen/enlighten_hvm.c
++++ b/arch/x86/xen/enlighten_hvm.c
+@@ -229,9 +229,9 @@ static uint32_t __init xen_platform_hvm(void)
+ const struct hypervisor_x86 x86_hyper_xen_hvm = {
+       .name                   = "Xen HVM",
+       .detect                 = xen_platform_hvm,
+-      .init_platform          = xen_hvm_guest_init,
+-      .pin_vcpu               = xen_pin_vcpu,
+-      .x2apic_available       = xen_x2apic_para_available,
+-      .init_mem_mapping       = xen_hvm_init_mem_mapping,
++      .init.init_platform     = xen_hvm_guest_init,
++      .init.x2apic_available  = xen_x2apic_para_available,
++      .init.init_mem_mapping  = xen_hvm_init_mem_mapping,
++      .runtime.pin_vcpu       = xen_pin_vcpu,
+ };
+ EXPORT_SYMBOL(x86_hyper_xen_hvm);
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index e7b213047724..4110fc9e5ee9 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -1461,6 +1461,6 @@ static uint32_t __init xen_platform_pv(void)
+ const struct hypervisor_x86 x86_hyper_xen_pv = {
+       .name                   = "Xen PV",
+       .detect                 = xen_platform_pv,
+-      .pin_vcpu               = xen_pin_vcpu,
++      .runtime.pin_vcpu       = xen_pin_vcpu,
+ };
+ EXPORT_SYMBOL(x86_hyper_xen_pv);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0122-drivers-misc-intel-pti-Rename-the-header-file-to-fre.patch b/patches/kernel/0122-drivers-misc-intel-pti-Rename-the-header-file-to-fre.patch

deleted file mode 100644 (file)

index 5274bdb..0000000
--- a/patches/kernel/0122-drivers-misc-intel-pti-Rename-the-header-file-to-fre.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Ingo Molnar <mingo@kernel.org>
-Date: Tue, 5 Dec 2017 14:14:47 +0100
-Subject: [PATCH] drivers/misc/intel/pti: Rename the header file to free up the
- namespace
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-We'd like to use the 'PTI' acronym for 'Page Table Isolation' - free up the
-namespace by renaming the <linux/pti.h> driver header to <linux/intel-pti.h>.
-
-(Also standardize the header guard name while at it.)
-
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: J Freyensee <james_p_freyensee@linux.intel.com>
-Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Cc: linux-kernel@vger.kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 1784f9144b143a1e8b19fe94083b040aa559182b)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit fc05996d0900765640d56179acd2f5d052ad33e2)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- include/linux/{pti.h => intel-pti.h} | 6 +++---
- drivers/misc/pti.c                   | 2 +-
- 2 files changed, 4 insertions(+), 4 deletions(-)
- rename include/linux/{pti.h => intel-pti.h} (94%)
-
-diff --git a/include/linux/pti.h b/include/linux/intel-pti.h
-similarity index 94%
-rename from include/linux/pti.h
-rename to include/linux/intel-pti.h
-index b3ea01a3197e..2710d72de3c9 100644
---- a/include/linux/pti.h
-+++ b/include/linux/intel-pti.h
-@@ -22,8 +22,8 @@
-  * interface to write out it's contents for debugging a mobile system.
-  */
- 
--#ifndef PTI_H_
--#define PTI_H_
-+#ifndef LINUX_INTEL_PTI_H_
-+#define LINUX_INTEL_PTI_H_
- 
- /* offset for last dword of any PTI message. Part of MIPI P1149.7 */
- #define PTI_LASTDWORD_DTS     0x30
-@@ -40,4 +40,4 @@ struct pti_masterchannel *pti_request_masterchannel(u8 type,
-                                                   const char *thread_name);
- void pti_release_masterchannel(struct pti_masterchannel *mc);
- 
--#endif /*PTI_H_*/
-+#endif /* LINUX_INTEL_PTI_H_ */
-diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
-index eda38cbe8530..41f2a9f6851d 100644
---- a/drivers/misc/pti.c
-+++ b/drivers/misc/pti.c
-@@ -32,7 +32,7 @@
- #include <linux/pci.h>
- #include <linux/mutex.h>
- #include <linux/miscdevice.h>
--#include <linux/pti.h>
-+#include <linux/intel-pti.h>
- #include <linux/slab.h>
- #include <linux/uaccess.h>
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0122-x86-virt-Add-enum-for-hypervisors-to-replace-x86_hyp.patch b/patches/kernel/0122-x86-virt-Add-enum-for-hypervisors-to-replace-x86_hyp.patch

new file mode 100644 (file)

index 0000000..02762ba
--- /dev/null
+++ b/patches/kernel/0122-x86-virt-Add-enum-for-hypervisors-to-replace-x86_hyp.patch
@@ -0,0 +1,301 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 9 Nov 2017 14:27:36 +0100
+Subject: [PATCH] x86/virt: Add enum for hypervisors to replace x86_hyper
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The x86_hyper pointer is only used for checking whether a virtual
+device is supporting the hypervisor the system is running on.
+
+Use an enum for that purpose instead and drop the x86_hyper pointer.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Xavier Deguillard <xdeguillard@vmware.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: akataria@vmware.com
+Cc: arnd@arndb.de
+Cc: boris.ostrovsky@oracle.com
+Cc: devel@linuxdriverproject.org
+Cc: dmitry.torokhov@gmail.com
+Cc: gregkh@linuxfoundation.org
+Cc: haiyangz@microsoft.com
+Cc: kvm@vger.kernel.org
+Cc: kys@microsoft.com
+Cc: linux-graphics-maintainer@vmware.com
+Cc: linux-input@vger.kernel.org
+Cc: moltmann@vmware.com
+Cc: pbonzini@redhat.com
+Cc: pv-drivers@vmware.com
+Cc: rkrcmar@redhat.com
+Cc: sthemmin@microsoft.com
+Cc: virtualization@lists.linux-foundation.org
+Cc: xen-devel@lists.xenproject.org
+Link: http://lkml.kernel.org/r/20171109132739.23465-3-jgross@suse.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit 03b2a320b19f1424e9ac9c21696be9c60b6d0d93)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit c24b0a226fadfe1abe78fa568ff84fea6ecd7ca5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/hypervisor.h | 23 ++++++++++++++---------
+ arch/x86/hyperv/hv_init.c         |  2 +-
+ arch/x86/kernel/cpu/hypervisor.c  | 12 +++++++++---
+ arch/x86/kernel/cpu/mshyperv.c    |  6 +++---
+ arch/x86/kernel/cpu/vmware.c      |  4 ++--
+ arch/x86/kernel/kvm.c             |  4 ++--
+ arch/x86/xen/enlighten_hvm.c      |  4 ++--
+ arch/x86/xen/enlighten_pv.c       |  4 ++--
+ drivers/hv/vmbus_drv.c            |  2 +-
+ drivers/input/mouse/vmmouse.c     | 10 ++++------
+ drivers/misc/vmw_balloon.c        |  2 +-
+ 11 files changed, 41 insertions(+), 32 deletions(-)
+
+diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
+index 0eca7239a7aa..1b0a5abcd8ae 100644
+--- a/arch/x86/include/asm/hypervisor.h
++++ b/arch/x86/include/asm/hypervisor.h
+@@ -29,6 +29,16 @@
+ /*
+  * x86 hypervisor information
+  */
++
++enum x86_hypervisor_type {
++      X86_HYPER_NATIVE = 0,
++      X86_HYPER_VMWARE,
++      X86_HYPER_MS_HYPERV,
++      X86_HYPER_XEN_PV,
++      X86_HYPER_XEN_HVM,
++      X86_HYPER_KVM,
++};
++
+ struct hypervisor_x86 {
+       /* Hypervisor name */
+       const char      *name;
+@@ -36,6 +46,9 @@ struct hypervisor_x86 {
+       /* Detection routine */
+       uint32_t        (*detect)(void);
+ 
++      /* Hypervisor type */
++      enum x86_hypervisor_type type;
++
+       /* init time callbacks */
+       struct x86_hyper_init init;
+ 
+@@ -43,15 +56,7 @@ struct hypervisor_x86 {
+       struct x86_hyper_runtime runtime;
+ };
+ 
+-extern const struct hypervisor_x86 *x86_hyper;
+-
+-/* Recognized hypervisors */
+-extern const struct hypervisor_x86 x86_hyper_vmware;
+-extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
+-extern const struct hypervisor_x86 x86_hyper_xen_pv;
+-extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+-extern const struct hypervisor_x86 x86_hyper_kvm;
+-
++extern enum x86_hypervisor_type x86_hyper_type;
+ extern void init_hypervisor_platform(void);
+ #else
+ static inline void init_hypervisor_platform(void) { }
+diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
+index ec7c9661743f..32fa894139d5 100644
+--- a/arch/x86/hyperv/hv_init.c
++++ b/arch/x86/hyperv/hv_init.c
+@@ -99,7 +99,7 @@ void hyperv_init(void)
+       u64 guest_id;
+       union hv_x64_msr_hypercall_contents hypercall_msr;
+ 
+-      if (x86_hyper != &x86_hyper_ms_hyperv)
++      if (x86_hyper_type != X86_HYPER_MS_HYPERV)
+               return;
+ 
+       /*
+diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
+index 22226c1bf092..bea8d3e24f50 100644
+--- a/arch/x86/kernel/cpu/hypervisor.c
++++ b/arch/x86/kernel/cpu/hypervisor.c
+@@ -26,6 +26,12 @@
+ #include <asm/processor.h>
+ #include <asm/hypervisor.h>
+ 
++extern const struct hypervisor_x86 x86_hyper_vmware;
++extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
++extern const struct hypervisor_x86 x86_hyper_xen_pv;
++extern const struct hypervisor_x86 x86_hyper_xen_hvm;
++extern const struct hypervisor_x86 x86_hyper_kvm;
++
+ static const __initconst struct hypervisor_x86 * const hypervisors[] =
+ {
+ #ifdef CONFIG_XEN_PV
+@@ -41,8 +47,8 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
+ #endif
+ };
+ 
+-const struct hypervisor_x86 *x86_hyper;
+-EXPORT_SYMBOL(x86_hyper);
++enum x86_hypervisor_type x86_hyper_type;
++EXPORT_SYMBOL(x86_hyper_type);
+ 
+ static inline const struct hypervisor_x86 * __init
+ detect_hypervisor_vendor(void)
+@@ -87,6 +93,6 @@ void __init init_hypervisor_platform(void)
+       copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
+       copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
+ 
+-      x86_hyper = h;
++      x86_hyper_type = h->type;
+       x86_init.hyper.init_platform();
+ }
+diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
+index 9707e431da27..3672f1192119 100644
+--- a/arch/x86/kernel/cpu/mshyperv.c
++++ b/arch/x86/kernel/cpu/mshyperv.c
+@@ -252,9 +252,9 @@ static void __init ms_hyperv_init_platform(void)
+ #endif
+ }
+ 
+-const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
+-      .name                   = "Microsoft HyperV",
++const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
++      .name                   = "Microsoft Hyper-V",
+       .detect                 = ms_hyperv_platform,
++      .type                   = X86_HYPER_MS_HYPERV,
+       .init.init_platform     = ms_hyperv_init_platform,
+ };
+-EXPORT_SYMBOL(x86_hyper_ms_hyperv);
+diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
+index 4804c1d063c8..8e005329648b 100644
+--- a/arch/x86/kernel/cpu/vmware.c
++++ b/arch/x86/kernel/cpu/vmware.c
+@@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void)
+              (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
+ }
+ 
+-const __refconst struct hypervisor_x86 x86_hyper_vmware = {
++const __initconst struct hypervisor_x86 x86_hyper_vmware = {
+       .name                   = "VMware",
+       .detect                 = vmware_platform,
++      .type                   = X86_HYPER_VMWARE,
+       .init.init_platform     = vmware_platform_setup,
+       .init.x2apic_available  = vmware_legacy_x2apic_available,
+ };
+-EXPORT_SYMBOL(x86_hyper_vmware);
+diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
+index 54e373bfeab9..b65a51a24647 100644
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -544,12 +544,12 @@ static uint32_t __init kvm_detect(void)
+       return kvm_cpuid_base();
+ }
+ 
+-const struct hypervisor_x86 x86_hyper_kvm __refconst = {
++const __initconst struct hypervisor_x86 x86_hyper_kvm = {
+       .name                   = "KVM",
+       .detect                 = kvm_detect,
++      .type                   = X86_HYPER_KVM,
+       .init.x2apic_available  = kvm_para_available,
+ };
+-EXPORT_SYMBOL_GPL(x86_hyper_kvm);
+ 
+ static __init int activate_jump_labels(void)
+ {
+diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
+index 7b1622089f96..754d5391d9fa 100644
+--- a/arch/x86/xen/enlighten_hvm.c
++++ b/arch/x86/xen/enlighten_hvm.c
+@@ -226,12 +226,12 @@ static uint32_t __init xen_platform_hvm(void)
+       return xen_cpuid_base();
+ }
+ 
+-const struct hypervisor_x86 x86_hyper_xen_hvm = {
++const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = {
+       .name                   = "Xen HVM",
+       .detect                 = xen_platform_hvm,
++      .type                   = X86_HYPER_XEN_HVM,
+       .init.init_platform     = xen_hvm_guest_init,
+       .init.x2apic_available  = xen_x2apic_para_available,
+       .init.init_mem_mapping  = xen_hvm_init_mem_mapping,
+       .runtime.pin_vcpu       = xen_pin_vcpu,
+ };
+-EXPORT_SYMBOL(x86_hyper_xen_hvm);
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index 4110fc9e5ee9..63c81154083b 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -1458,9 +1458,9 @@ static uint32_t __init xen_platform_pv(void)
+       return 0;
+ }
+ 
+-const struct hypervisor_x86 x86_hyper_xen_pv = {
++const __initconst struct hypervisor_x86 x86_hyper_xen_pv = {
+       .name                   = "Xen PV",
+       .detect                 = xen_platform_pv,
++      .type                   = X86_HYPER_XEN_PV,
+       .runtime.pin_vcpu       = xen_pin_vcpu,
+ };
+-EXPORT_SYMBOL(x86_hyper_xen_pv);
+diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
+index 5ad627044dd1..8aca7613e482 100644
+--- a/drivers/hv/vmbus_drv.c
++++ b/drivers/hv/vmbus_drv.c
+@@ -1551,7 +1551,7 @@ static int __init hv_acpi_init(void)
+ {
+       int ret, t;
+ 
+-      if (x86_hyper != &x86_hyper_ms_hyperv)
++      if (x86_hyper_type != X86_HYPER_MS_HYPERV)
+               return -ENODEV;
+ 
+       init_completion(&probe_event);
+diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c
+index 0f586780ceb4..1ae5c1ef3f5b 100644
+--- a/drivers/input/mouse/vmmouse.c
++++ b/drivers/input/mouse/vmmouse.c
+@@ -316,11 +316,9 @@ static int vmmouse_enable(struct psmouse *psmouse)
+ /*
+  * Array of supported hypervisors.
+  */
+-static const struct hypervisor_x86 *vmmouse_supported_hypervisors[] = {
+-      &x86_hyper_vmware,
+-#ifdef CONFIG_KVM_GUEST
+-      &x86_hyper_kvm,
+-#endif
++static enum x86_hypervisor_type vmmouse_supported_hypervisors[] = {
++      X86_HYPER_VMWARE,
++      X86_HYPER_KVM,
+ };
+ 
+ /**
+@@ -331,7 +329,7 @@ static bool vmmouse_check_hypervisor(void)
+       int i;
+ 
+       for (i = 0; i < ARRAY_SIZE(vmmouse_supported_hypervisors); i++)
+-              if (vmmouse_supported_hypervisors[i] == x86_hyper)
++              if (vmmouse_supported_hypervisors[i] == x86_hyper_type)
+                       return true;
+ 
+       return false;
+diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
+index 1e688bfec567..9047c0a529b2 100644
+--- a/drivers/misc/vmw_balloon.c
++++ b/drivers/misc/vmw_balloon.c
+@@ -1271,7 +1271,7 @@ static int __init vmballoon_init(void)
+        * Check if we are running on VMware's hypervisor and bail out
+        * if we are not.
+        */
+-      if (x86_hyper != &x86_hyper_vmware)
++      if (x86_hyper_type != X86_HYPER_VMWARE)
+               return -ENODEV;
+ 
+       for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0123-drivers-misc-intel-pti-Rename-the-header-file-to-fre.patch b/patches/kernel/0123-drivers-misc-intel-pti-Rename-the-header-file-to-fre.patch

new file mode 100644 (file)

index 0000000..5274bdb
--- /dev/null
+++ b/patches/kernel/0123-drivers-misc-intel-pti-Rename-the-header-file-to-fre.patch
@@ -0,0 +1,73 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@kernel.org>
+Date: Tue, 5 Dec 2017 14:14:47 +0100
+Subject: [PATCH] drivers/misc/intel/pti: Rename the header file to free up the
+ namespace
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+We'd like to use the 'PTI' acronym for 'Page Table Isolation' - free up the
+namespace by renaming the <linux/pti.h> driver header to <linux/intel-pti.h>.
+
+(Also standardize the header guard name while at it.)
+
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: J Freyensee <james_p_freyensee@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 1784f9144b143a1e8b19fe94083b040aa559182b)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit fc05996d0900765640d56179acd2f5d052ad33e2)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ include/linux/{pti.h => intel-pti.h} | 6 +++---
+ drivers/misc/pti.c                   | 2 +-
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+ rename include/linux/{pti.h => intel-pti.h} (94%)
+
+diff --git a/include/linux/pti.h b/include/linux/intel-pti.h
+similarity index 94%
+rename from include/linux/pti.h
+rename to include/linux/intel-pti.h
+index b3ea01a3197e..2710d72de3c9 100644
+--- a/include/linux/pti.h
++++ b/include/linux/intel-pti.h
+@@ -22,8 +22,8 @@
+  * interface to write out it's contents for debugging a mobile system.
+  */
+ 
+-#ifndef PTI_H_
+-#define PTI_H_
++#ifndef LINUX_INTEL_PTI_H_
++#define LINUX_INTEL_PTI_H_
+ 
+ /* offset for last dword of any PTI message. Part of MIPI P1149.7 */
+ #define PTI_LASTDWORD_DTS     0x30
+@@ -40,4 +40,4 @@ struct pti_masterchannel *pti_request_masterchannel(u8 type,
+                                                   const char *thread_name);
+ void pti_release_masterchannel(struct pti_masterchannel *mc);
+ 
+-#endif /*PTI_H_*/
++#endif /* LINUX_INTEL_PTI_H_ */
+diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
+index eda38cbe8530..41f2a9f6851d 100644
+--- a/drivers/misc/pti.c
++++ b/drivers/misc/pti.c
+@@ -32,7 +32,7 @@
+ #include <linux/pci.h>
+ #include <linux/mutex.h>
+ #include <linux/miscdevice.h>
+-#include <linux/pti.h>
++#include <linux/intel-pti.h>
+ #include <linux/slab.h>
+ #include <linux/uaccess.h>
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0123-x86-cpufeature-Add-User-Mode-Instruction-Prevention-.patch b/patches/kernel/0123-x86-cpufeature-Add-User-Mode-Instruction-Prevention-.patch

deleted file mode 100644 (file)

index 1a0d9a2..0000000
--- a/patches/kernel/0123-x86-cpufeature-Add-User-Mode-Instruction-Prevention-.patch
+++ /dev/null
@@ -1,88 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
-Date: Sun, 5 Nov 2017 18:27:51 -0800
-Subject: [PATCH] x86/cpufeature: Add User-Mode Instruction Prevention
- definitions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-[ Note, this is a Git cherry-pick of the following commit: (limited to the cpufeatures.h file)
-
-    3522c2a6a4f3 ("x86/cpufeature: Add User-Mode Instruction Prevention definitions")
-
-  ... for easier x86 PTI code testing and back-porting. ]
-
-User-Mode Instruction Prevention is a security feature present in new
-Intel processors that, when set, prevents the execution of a subset of
-instructions if such instructions are executed in user mode (CPL > 0).
-Attempting to execute such instructions causes a general protection
-exception.
-
-The subset of instructions comprises:
-
- * SGDT - Store Global Descriptor Table
- * SIDT - Store Interrupt Descriptor Table
- * SLDT - Store Local Descriptor Table
- * SMSW - Store Machine Status Word
- * STR  - Store Task Register
-
-This feature is also added to the list of disabled-features to allow
-a cleaner handling of build-time configuration.
-
-Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Chen Yucong <slaoub@gmail.com>
-Cc: Chris Metcalf <cmetcalf@mellanox.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Fenghua Yu <fenghua.yu@intel.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Huang Rui <ray.huang@amd.com>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Jonathan Corbet <corbet@lwn.net>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Masami Hiramatsu <mhiramat@kernel.org>
-Cc: Michael S. Tsirkin <mst@redhat.com>
-Cc: Paolo Bonzini <pbonzini@redhat.com>
-Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
-Cc: Shuah Khan <shuah@kernel.org>
-Cc: Tony Luck <tony.luck@intel.com>
-Cc: Vlastimil Babka <vbabka@suse.cz>
-Cc: ricardo.neri@intel.com
-Link: http://lkml.kernel.org/r/1509935277-22138-7-git-send-email-ricardo.neri-calderon@linux.intel.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit a8b4db562e7283a1520f9e9730297ecaab7622ea)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 6193ddb9de38665ba45f7f17dd9713baec3673ca)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cpufeatures.h | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index 6db782ed9cdb..0ea630bb3e74 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -295,6 +295,7 @@
- 
- /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
- #define X86_FEATURE_AVX512VBMI                (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-+#define X86_FEATURE_UMIP              (16*32+ 2) /* User Mode Instruction Protection */
- #define X86_FEATURE_PKU                       (16*32+ 3) /* Protection Keys for Userspace */
- #define X86_FEATURE_OSPKE             (16*32+ 4) /* OS Protection Keys Enable */
- #define X86_FEATURE_AVX512_VBMI2      (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
--- 
-2.14.2
-
diff --git a/patches/kernel/0124-x86-Make-X86_BUG_FXSAVE_LEAK-detectable-in-CPUID-on-.patch b/patches/kernel/0124-x86-Make-X86_BUG_FXSAVE_LEAK-detectable-in-CPUID-on-.patch

deleted file mode 100644 (file)

index 342f3ef..0000000
--- a/patches/kernel/0124-x86-Make-X86_BUG_FXSAVE_LEAK-detectable-in-CPUID-on-.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Rudolf Marek <r.marek@assembler.cz>
-Date: Tue, 28 Nov 2017 22:01:06 +0100
-Subject: [PATCH] x86: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-[ Note, this is a Git cherry-pick of the following commit:
-
-    2b67799bdf25 ("x86: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD")
-
-  ... for easier x86 PTI code testing and back-porting. ]
-
-The latest AMD AMD64 Architecture Programmer's Manual
-adds a CPUID feature XSaveErPtr (CPUID_Fn80000008_EBX[2]).
-
-If this feature is set, the FXSAVE, XSAVE, FXSAVEOPT, XSAVEC, XSAVES
-/ FXRSTOR, XRSTOR, XRSTORS always save/restore error pointers,
-thus making the X86_BUG_FXSAVE_LEAK workaround obsolete on such CPUs.
-
-Signed-Off-By: Rudolf Marek <r.marek@assembler.cz>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Tested-by: Borislav Petkov <bp@suse.de>
-Cc: Andy Lutomirski <luto@amacapital.net>
-Link: https://lkml.kernel.org/r/bdcebe90-62c5-1f05-083c-eba7f08b2540@assembler.cz
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit f2dbad36c55e5d3a91dccbde6e8cae345fe5632f)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 281b622113c66ba2de9b7725e1d232ea3c282114)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cpufeatures.h | 1 +
- arch/x86/kernel/cpu/amd.c          | 7 +++++--
- 2 files changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index 0ea630bb3e74..d57a174ec97c 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -265,6 +265,7 @@
- /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
- #define X86_FEATURE_CLZERO            (13*32+ 0) /* CLZERO instruction */
- #define X86_FEATURE_IRPERF            (13*32+ 1) /* Instructions Retired Count */
-+#define X86_FEATURE_XSAVEERPTR                (13*32+ 2) /* Always save/restore FP error pointers */
- 
- /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
- #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
-diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
-index 3b9e220621f8..2a5328cc03a6 100644
---- a/arch/x86/kernel/cpu/amd.c
-+++ b/arch/x86/kernel/cpu/amd.c
-@@ -760,8 +760,11 @@ static void init_amd(struct cpuinfo_x86 *c)
-       case 0x15: init_amd_bd(c); break;
-       }
- 
--      /* Enable workaround for FXSAVE leak */
--      if (c->x86 >= 6)
-+      /*
-+       * Enable workaround for FXSAVE leak on CPUs
-+       * without a XSaveErPtr feature
-+       */
-+      if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
-               set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
- 
-       cpu_detect_cache_sizes(c);
--- 
-2.14.2
-
diff --git a/patches/kernel/0124-x86-cpufeature-Add-User-Mode-Instruction-Prevention-.patch b/patches/kernel/0124-x86-cpufeature-Add-User-Mode-Instruction-Prevention-.patch

new file mode 100644 (file)

index 0000000..1a0d9a2
--- /dev/null
+++ b/patches/kernel/0124-x86-cpufeature-Add-User-Mode-Instruction-Prevention-.patch
@@ -0,0 +1,88 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Date: Sun, 5 Nov 2017 18:27:51 -0800
+Subject: [PATCH] x86/cpufeature: Add User-Mode Instruction Prevention
+ definitions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+[ Note, this is a Git cherry-pick of the following commit: (limited to the cpufeatures.h file)
+
+    3522c2a6a4f3 ("x86/cpufeature: Add User-Mode Instruction Prevention definitions")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+User-Mode Instruction Prevention is a security feature present in new
+Intel processors that, when set, prevents the execution of a subset of
+instructions if such instructions are executed in user mode (CPL > 0).
+Attempting to execute such instructions causes a general protection
+exception.
+
+The subset of instructions comprises:
+
+ * SGDT - Store Global Descriptor Table
+ * SIDT - Store Interrupt Descriptor Table
+ * SLDT - Store Local Descriptor Table
+ * SMSW - Store Machine Status Word
+ * STR  - Store Task Register
+
+This feature is also added to the list of disabled-features to allow
+a cleaner handling of build-time configuration.
+
+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Chen Yucong <slaoub@gmail.com>
+Cc: Chris Metcalf <cmetcalf@mellanox.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Huang Rui <ray.huang@amd.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: ricardo.neri@intel.com
+Link: http://lkml.kernel.org/r/1509935277-22138-7-git-send-email-ricardo.neri-calderon@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit a8b4db562e7283a1520f9e9730297ecaab7622ea)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 6193ddb9de38665ba45f7f17dd9713baec3673ca)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cpufeatures.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 6db782ed9cdb..0ea630bb3e74 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -295,6 +295,7 @@
+ 
+ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
+ #define X86_FEATURE_AVX512VBMI                (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
++#define X86_FEATURE_UMIP              (16*32+ 2) /* User Mode Instruction Protection */
+ #define X86_FEATURE_PKU                       (16*32+ 3) /* Protection Keys for Userspace */
+ #define X86_FEATURE_OSPKE             (16*32+ 4) /* OS Protection Keys Enable */
+ #define X86_FEATURE_AVX512_VBMI2      (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0125-perf-x86-Enable-free-running-PEBS-for-REGS_USER-INTR.patch b/patches/kernel/0125-perf-x86-Enable-free-running-PEBS-for-REGS_USER-INTR.patch

deleted file mode 100644 (file)

index 169282e..0000000
--- a/patches/kernel/0125-perf-x86-Enable-free-running-PEBS-for-REGS_USER-INTR.patch
+++ /dev/null
@@ -1,109 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andi Kleen <ak@linux.intel.com>
-Date: Thu, 31 Aug 2017 14:46:30 -0700
-Subject: [PATCH] perf/x86: Enable free running PEBS for REGS_USER/INTR
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-[ Note, this is a Git cherry-pick of the following commit:
-
-    a47ba4d77e12 ("perf/x86: Enable free running PEBS for REGS_USER/INTR")
-
-  ... for easier x86 PTI code testing and back-porting. ]
-
-Currently free running PEBS is disabled when user or interrupt
-registers are requested. Most of the registers are actually
-available in the PEBS record and can be supported.
-
-So we just need to check for the supported registers and then
-allow it: it is all except for the segment register.
-
-For user registers this only works when the counter is limited
-to ring 3 only, so this also needs to be checked.
-
-Signed-off-by: Andi Kleen <ak@linux.intel.com>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/20170831214630.21892-1-andi@firstfloor.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit 2fe1bc1f501d55e5925b4035bcd85781adc76c63)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 06c6715f5b78b9976e72467b6bba510e243e5aad)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/events/perf_event.h | 24 +++++++++++++++++++++++-
- arch/x86/events/intel/core.c |  4 ++++
- 2 files changed, 27 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
-index 0f7dad8bd358..590eaf7c2c3e 100644
---- a/arch/x86/events/perf_event.h
-+++ b/arch/x86/events/perf_event.h
-@@ -85,13 +85,15 @@ struct amd_nb {
-  * Flags PEBS can handle without an PMI.
-  *
-  * TID can only be handled by flushing at context switch.
-+ * REGS_USER can be handled for events limited to ring 3.
-  *
-  */
- #define PEBS_FREERUNNING_FLAGS \
-       (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
-       PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
-       PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
--      PERF_SAMPLE_TRANSACTION)
-+      PERF_SAMPLE_TRANSACTION | \
-+      PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
- 
- /*
-  * A debug store configuration.
-@@ -110,6 +112,26 @@ struct debug_store {
-       u64     pebs_event_reset[MAX_PEBS_EVENTS];
- };
- 
-+#define PEBS_REGS \
-+      (PERF_REG_X86_AX | \
-+       PERF_REG_X86_BX | \
-+       PERF_REG_X86_CX | \
-+       PERF_REG_X86_DX | \
-+       PERF_REG_X86_DI | \
-+       PERF_REG_X86_SI | \
-+       PERF_REG_X86_SP | \
-+       PERF_REG_X86_BP | \
-+       PERF_REG_X86_IP | \
-+       PERF_REG_X86_FLAGS | \
-+       PERF_REG_X86_R8 | \
-+       PERF_REG_X86_R9 | \
-+       PERF_REG_X86_R10 | \
-+       PERF_REG_X86_R11 | \
-+       PERF_REG_X86_R12 | \
-+       PERF_REG_X86_R13 | \
-+       PERF_REG_X86_R14 | \
-+       PERF_REG_X86_R15)
-+
- /*
-  * Per register state.
-  */
-diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
-index 6f342001ec6a..7f3afbf928bb 100644
---- a/arch/x86/events/intel/core.c
-+++ b/arch/x86/events/intel/core.c
-@@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
- 
-       if (event->attr.use_clockid)
-               flags &= ~PERF_SAMPLE_TIME;
-+      if (!event->attr.exclude_kernel)
-+              flags &= ~PERF_SAMPLE_REGS_USER;
-+      if (event->attr.sample_regs_user & ~PEBS_REGS)
-+              flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
-       return flags;
- }
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0125-x86-Make-X86_BUG_FXSAVE_LEAK-detectable-in-CPUID-on-.patch b/patches/kernel/0125-x86-Make-X86_BUG_FXSAVE_LEAK-detectable-in-CPUID-on-.patch

new file mode 100644 (file)

index 0000000..342f3ef
--- /dev/null
+++ b/patches/kernel/0125-x86-Make-X86_BUG_FXSAVE_LEAK-detectable-in-CPUID-on-.patch
@@ -0,0 +1,73 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Rudolf Marek <r.marek@assembler.cz>
+Date: Tue, 28 Nov 2017 22:01:06 +0100
+Subject: [PATCH] x86: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+[ Note, this is a Git cherry-pick of the following commit:
+
+    2b67799bdf25 ("x86: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+The latest AMD AMD64 Architecture Programmer's Manual
+adds a CPUID feature XSaveErPtr (CPUID_Fn80000008_EBX[2]).
+
+If this feature is set, the FXSAVE, XSAVE, FXSAVEOPT, XSAVEC, XSAVES
+/ FXRSTOR, XRSTOR, XRSTORS always save/restore error pointers,
+thus making the X86_BUG_FXSAVE_LEAK workaround obsolete on such CPUs.
+
+Signed-Off-By: Rudolf Marek <r.marek@assembler.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Tested-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Link: https://lkml.kernel.org/r/bdcebe90-62c5-1f05-083c-eba7f08b2540@assembler.cz
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit f2dbad36c55e5d3a91dccbde6e8cae345fe5632f)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 281b622113c66ba2de9b7725e1d232ea3c282114)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cpufeatures.h | 1 +
+ arch/x86/kernel/cpu/amd.c          | 7 +++++--
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 0ea630bb3e74..d57a174ec97c 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -265,6 +265,7 @@
+ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
+ #define X86_FEATURE_CLZERO            (13*32+ 0) /* CLZERO instruction */
+ #define X86_FEATURE_IRPERF            (13*32+ 1) /* Instructions Retired Count */
++#define X86_FEATURE_XSAVEERPTR                (13*32+ 2) /* Always save/restore FP error pointers */
+ 
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index 3b9e220621f8..2a5328cc03a6 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -760,8 +760,11 @@ static void init_amd(struct cpuinfo_x86 *c)
+       case 0x15: init_amd_bd(c); break;
+       }
+ 
+-      /* Enable workaround for FXSAVE leak */
+-      if (c->x86 >= 6)
++      /*
++       * Enable workaround for FXSAVE leak on CPUs
++       * without a XSaveErPtr feature
++       */
++      if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
+               set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
+ 
+       cpu_detect_cache_sizes(c);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0126-bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_ev.patch b/patches/kernel/0126-bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_ev.patch

deleted file mode 100644 (file)

index 95f4ce8..0000000
--- a/patches/kernel/0126-bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_ev.patch
+++ /dev/null
@@ -1,68 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Daniel Borkmann <daniel@iogearbox.net>
-Date: Tue, 12 Dec 2017 02:25:31 +0100
-Subject: [PATCH] bpf: fix build issues on um due to mising bpf_perf_event.h
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-[ Note, this is a Git cherry-pick of the following commit:
-
-    a23f06f06dbe ("bpf: fix build issues on um due to mising bpf_perf_event.h")
-
-  ... for easier x86 PTI code testing and back-porting. ]
-
-Since c895f6f703ad ("bpf: correct broken uapi for
-BPF_PROG_TYPE_PERF_EVENT program type") um (uml) won't build
-on i386 or x86_64:
-
-  [...]
-    CC      init/main.o
-  In file included from ../include/linux/perf_event.h:18:0,
-                   from ../include/linux/trace_events.h:10,
-                   from ../include/trace/syscall.h:7,
-                   from ../include/linux/syscalls.h:82,
-                   from ../init/main.c:20:
-  ../include/uapi/linux/bpf_perf_event.h:11:32: fatal error:
-  asm/bpf_perf_event.h: No such file or directory #include
-  <asm/bpf_perf_event.h>
-  [...]
-
-Lets add missing bpf_perf_event.h also to um arch. This seems
-to be the only one still missing.
-
-Fixes: c895f6f703ad ("bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type")
-Reported-by: Randy Dunlap <rdunlap@infradead.org>
-Suggested-by: Richard Weinberger <richard@sigma-star.at>
-Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-Tested-by: Randy Dunlap <rdunlap@infradead.org>
-Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
-Cc: Richard Weinberger <richard@sigma-star.at>
-Acked-by: Alexei Starovoitov <ast@kernel.org>
-Acked-by: Richard Weinberger <richard@nod.at>
-Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit ab95477e7cb35557ecfc837687007b646bab9a9f)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 1883b099261ebece3016b50fa403ffde90027a04)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/um/include/asm/Kbuild | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
-index 50a32c33d729..73c57f614c9e 100644
---- a/arch/um/include/asm/Kbuild
-+++ b/arch/um/include/asm/Kbuild
-@@ -1,4 +1,5 @@
- generic-y += barrier.h
-+generic-y += bpf_perf_event.h
- generic-y += bug.h
- generic-y += clkdev.h
- generic-y += current.h
--- 
-2.14.2
-
diff --git a/patches/kernel/0126-perf-x86-Enable-free-running-PEBS-for-REGS_USER-INTR.patch b/patches/kernel/0126-perf-x86-Enable-free-running-PEBS-for-REGS_USER-INTR.patch

new file mode 100644 (file)

index 0000000..169282e
--- /dev/null
+++ b/patches/kernel/0126-perf-x86-Enable-free-running-PEBS-for-REGS_USER-INTR.patch
@@ -0,0 +1,109 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andi Kleen <ak@linux.intel.com>
+Date: Thu, 31 Aug 2017 14:46:30 -0700
+Subject: [PATCH] perf/x86: Enable free running PEBS for REGS_USER/INTR
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+[ Note, this is a Git cherry-pick of the following commit:
+
+    a47ba4d77e12 ("perf/x86: Enable free running PEBS for REGS_USER/INTR")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+Currently free running PEBS is disabled when user or interrupt
+registers are requested. Most of the registers are actually
+available in the PEBS record and can be supported.
+
+So we just need to check for the supported registers and then
+allow it: it is all except for the segment register.
+
+For user registers this only works when the counter is limited
+to ring 3 only, so this also needs to be checked.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20170831214630.21892-1-andi@firstfloor.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit 2fe1bc1f501d55e5925b4035bcd85781adc76c63)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 06c6715f5b78b9976e72467b6bba510e243e5aad)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/events/perf_event.h | 24 +++++++++++++++++++++++-
+ arch/x86/events/intel/core.c |  4 ++++
+ 2 files changed, 27 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
+index 0f7dad8bd358..590eaf7c2c3e 100644
+--- a/arch/x86/events/perf_event.h
++++ b/arch/x86/events/perf_event.h
+@@ -85,13 +85,15 @@ struct amd_nb {
+  * Flags PEBS can handle without an PMI.
+  *
+  * TID can only be handled by flushing at context switch.
++ * REGS_USER can be handled for events limited to ring 3.
+  *
+  */
+ #define PEBS_FREERUNNING_FLAGS \
+       (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
+       PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
+       PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
+-      PERF_SAMPLE_TRANSACTION)
++      PERF_SAMPLE_TRANSACTION | \
++      PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
+ 
+ /*
+  * A debug store configuration.
+@@ -110,6 +112,26 @@ struct debug_store {
+       u64     pebs_event_reset[MAX_PEBS_EVENTS];
+ };
+ 
++#define PEBS_REGS \
++      (PERF_REG_X86_AX | \
++       PERF_REG_X86_BX | \
++       PERF_REG_X86_CX | \
++       PERF_REG_X86_DX | \
++       PERF_REG_X86_DI | \
++       PERF_REG_X86_SI | \
++       PERF_REG_X86_SP | \
++       PERF_REG_X86_BP | \
++       PERF_REG_X86_IP | \
++       PERF_REG_X86_FLAGS | \
++       PERF_REG_X86_R8 | \
++       PERF_REG_X86_R9 | \
++       PERF_REG_X86_R10 | \
++       PERF_REG_X86_R11 | \
++       PERF_REG_X86_R12 | \
++       PERF_REG_X86_R13 | \
++       PERF_REG_X86_R14 | \
++       PERF_REG_X86_R15)
++
+ /*
+  * Per register state.
+  */
+diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
+index 6f342001ec6a..7f3afbf928bb 100644
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+ 
+       if (event->attr.use_clockid)
+               flags &= ~PERF_SAMPLE_TIME;
++      if (!event->attr.exclude_kernel)
++              flags &= ~PERF_SAMPLE_REGS_USER;
++      if (event->attr.sample_regs_user & ~PEBS_REGS)
++              flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
+       return flags;
+ }
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0127-bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_ev.patch b/patches/kernel/0127-bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_ev.patch

new file mode 100644 (file)

index 0000000..95f4ce8
--- /dev/null
+++ b/patches/kernel/0127-bpf-fix-build-issues-on-um-due-to-mising-bpf_perf_ev.patch
@@ -0,0 +1,68 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 12 Dec 2017 02:25:31 +0100
+Subject: [PATCH] bpf: fix build issues on um due to mising bpf_perf_event.h
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+[ Note, this is a Git cherry-pick of the following commit:
+
+    a23f06f06dbe ("bpf: fix build issues on um due to mising bpf_perf_event.h")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+Since c895f6f703ad ("bpf: correct broken uapi for
+BPF_PROG_TYPE_PERF_EVENT program type") um (uml) won't build
+on i386 or x86_64:
+
+  [...]
+    CC      init/main.o
+  In file included from ../include/linux/perf_event.h:18:0,
+                   from ../include/linux/trace_events.h:10,
+                   from ../include/trace/syscall.h:7,
+                   from ../include/linux/syscalls.h:82,
+                   from ../init/main.c:20:
+  ../include/uapi/linux/bpf_perf_event.h:11:32: fatal error:
+  asm/bpf_perf_event.h: No such file or directory #include
+  <asm/bpf_perf_event.h>
+  [...]
+
+Lets add missing bpf_perf_event.h also to um arch. This seems
+to be the only one still missing.
+
+Fixes: c895f6f703ad ("bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type")
+Reported-by: Randy Dunlap <rdunlap@infradead.org>
+Suggested-by: Richard Weinberger <richard@sigma-star.at>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: Randy Dunlap <rdunlap@infradead.org>
+Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+Cc: Richard Weinberger <richard@sigma-star.at>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit ab95477e7cb35557ecfc837687007b646bab9a9f)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 1883b099261ebece3016b50fa403ffde90027a04)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/um/include/asm/Kbuild | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
+index 50a32c33d729..73c57f614c9e 100644
+--- a/arch/um/include/asm/Kbuild
++++ b/arch/um/include/asm/Kbuild
+@@ -1,4 +1,5 @@
+ generic-y += barrier.h
++generic-y += bpf_perf_event.h
+ generic-y += bug.h
+ generic-y += clkdev.h
+ generic-y += current.h
+-- 
+2.14.2
+
diff --git a/patches/kernel/0127-locking-barriers-Add-implicit-smp_read_barrier_depen.patch b/patches/kernel/0127-locking-barriers-Add-implicit-smp_read_barrier_depen.patch

deleted file mode 100644 (file)

index 03874b6..0000000
--- a/patches/kernel/0127-locking-barriers-Add-implicit-smp_read_barrier_depen.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Will Deacon <will.deacon@arm.com>
-Date: Tue, 24 Oct 2017 11:22:47 +0100
-Subject: [PATCH] locking/barriers: Add implicit smp_read_barrier_depends() to
- READ_ONCE()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-[ Note, this is a Git cherry-pick of the following commit:
-
-    76ebbe78f739 ("locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE()")
-
-  ... for easier x86 PTI code testing and back-porting. ]
-
-In preparation for the removal of lockless_dereference(), which is the
-same as READ_ONCE() on all architectures other than Alpha, add an
-implicit smp_read_barrier_depends() to READ_ONCE() so that it can be
-used to head dependency chains on all architectures.
-
-Signed-off-by: Will Deacon <will.deacon@arm.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/1508840570-22169-3-git-send-email-will.deacon@arm.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit c2bc66082e1048c7573d72e62f597bdc5ce13fea)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 6ef3d843f8f1a8b72ca83d4b1d457c2896278ccd)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- include/linux/compiler.h | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/include/linux/compiler.h b/include/linux/compiler.h
-index 043b60de041e..8af8814ebe7a 100644
---- a/include/linux/compiler.h
-+++ b/include/linux/compiler.h
-@@ -314,6 +314,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
-               __read_once_size(&(x), __u.__c, sizeof(x));             \
-       else                                                            \
-               __read_once_size_nocheck(&(x), __u.__c, sizeof(x));     \
-+      smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
-       __u.__val;                                                      \
- })
- #define READ_ONCE(x) __READ_ONCE(x, 1)
--- 
-2.14.2
-
diff --git a/patches/kernel/0128-locking-barriers-Add-implicit-smp_read_barrier_depen.patch b/patches/kernel/0128-locking-barriers-Add-implicit-smp_read_barrier_depen.patch

new file mode 100644 (file)

index 0000000..03874b6
--- /dev/null
+++ b/patches/kernel/0128-locking-barriers-Add-implicit-smp_read_barrier_depen.patch
@@ -0,0 +1,53 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Tue, 24 Oct 2017 11:22:47 +0100
+Subject: [PATCH] locking/barriers: Add implicit smp_read_barrier_depends() to
+ READ_ONCE()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+[ Note, this is a Git cherry-pick of the following commit:
+
+    76ebbe78f739 ("locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE()")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+In preparation for the removal of lockless_dereference(), which is the
+same as READ_ONCE() on all architectures other than Alpha, add an
+implicit smp_read_barrier_depends() to READ_ONCE() so that it can be
+used to head dependency chains on all architectures.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1508840570-22169-3-git-send-email-will.deacon@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit c2bc66082e1048c7573d72e62f597bdc5ce13fea)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 6ef3d843f8f1a8b72ca83d4b1d457c2896278ccd)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ include/linux/compiler.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/include/linux/compiler.h b/include/linux/compiler.h
+index 043b60de041e..8af8814ebe7a 100644
+--- a/include/linux/compiler.h
++++ b/include/linux/compiler.h
+@@ -314,6 +314,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
+               __read_once_size(&(x), __u.__c, sizeof(x));             \
+       else                                                            \
+               __read_once_size_nocheck(&(x), __u.__c, sizeof(x));     \
++      smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
+       __u.__val;                                                      \
+ })
+ #define READ_ONCE(x) __READ_ONCE(x, 1)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0128-locking-barriers-Convert-users-of-lockless_dereferen.patch b/patches/kernel/0128-locking-barriers-Convert-users-of-lockless_dereferen.patch

deleted file mode 100644 (file)

index e519140..0000000
--- a/patches/kernel/0128-locking-barriers-Convert-users-of-lockless_dereferen.patch
+++ /dev/null
@@ -1,324 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Will Deacon <will.deacon@arm.com>
-Date: Tue, 24 Oct 2017 11:22:48 +0100
-Subject: [PATCH] locking/barriers: Convert users of lockless_dereference() to
- READ_ONCE()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-[ Note, this is a Git cherry-pick of the following commit:
-
-    506458efaf15 ("locking/barriers: Convert users of lockless_dereference() to READ_ONCE()")
-
-  ... for easier x86 PTI code testing and back-porting. ]
-
-READ_ONCE() now has an implicit smp_read_barrier_depends() call, so it
-can be used instead of lockless_dereference() without any change in
-semantics.
-
-Signed-off-by: Will Deacon <will.deacon@arm.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/1508840570-22169-4-git-send-email-will.deacon@arm.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 3382290ed2d5e275429cef510ab21889d3ccd164)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 7252704bfd83e951d00ec75526ed2bf64a7f6ee1)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/mmu_context.h |  4 ++--
- fs/overlayfs/ovl_entry.h           |  2 +-
- include/linux/rculist.h            |  4 ++--
- include/linux/rcupdate.h           |  4 ++--
- mm/slab.h                          |  2 +-
- arch/x86/events/core.c             |  2 +-
- arch/x86/kernel/ldt.c              |  2 +-
- drivers/md/dm-mpath.c              | 20 ++++++++++----------
- fs/dcache.c                        |  4 ++--
- fs/overlayfs/readdir.c             |  2 +-
- kernel/events/core.c               |  4 ++--
- kernel/seccomp.c                   |  2 +-
- kernel/task_work.c                 |  2 +-
- 13 files changed, 27 insertions(+), 27 deletions(-)
-
-diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index 3c856a15b98e..efc530642f7d 100644
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -72,8 +72,8 @@ static inline void load_mm_ldt(struct mm_struct *mm)
- #ifdef CONFIG_MODIFY_LDT_SYSCALL
-       struct ldt_struct *ldt;
- 
--      /* lockless_dereference synchronizes with smp_store_release */
--      ldt = lockless_dereference(mm->context.ldt);
-+      /* READ_ONCE synchronizes with smp_store_release */
-+      ldt = READ_ONCE(mm->context.ldt);
- 
-       /*
-        * Any change to mm->context.ldt is followed by an IPI to all
-diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
-index 25d9b5adcd42..36b49bd09264 100644
---- a/fs/overlayfs/ovl_entry.h
-+++ b/fs/overlayfs/ovl_entry.h
-@@ -77,5 +77,5 @@ static inline struct ovl_inode *OVL_I(struct inode *inode)
- 
- static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
- {
--      return lockless_dereference(oi->__upperdentry);
-+      return READ_ONCE(oi->__upperdentry);
- }
-diff --git a/include/linux/rculist.h b/include/linux/rculist.h
-index b1fd8bf85fdc..3a2bb7d8ed4d 100644
---- a/include/linux/rculist.h
-+++ b/include/linux/rculist.h
-@@ -274,7 +274,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
-  * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
-  */
- #define list_entry_rcu(ptr, type, member) \
--      container_of(lockless_dereference(ptr), type, member)
-+      container_of(READ_ONCE(ptr), type, member)
- 
- /**
-  * Where are list_empty_rcu() and list_first_entry_rcu()?
-@@ -367,7 +367,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
-  * example is when items are added to the list, but never deleted.
-  */
- #define list_entry_lockless(ptr, type, member) \
--      container_of((typeof(ptr))lockless_dereference(ptr), type, member)
-+      container_of((typeof(ptr))READ_ONCE(ptr), type, member)
- 
- /**
-  * list_for_each_entry_lockless - iterate over rcu list of given type
-diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
-index f816fc72b51e..ae494eb7b401 100644
---- a/include/linux/rcupdate.h
-+++ b/include/linux/rcupdate.h
-@@ -341,7 +341,7 @@ static inline void rcu_preempt_sleep_check(void) { }
- #define __rcu_dereference_check(p, c, space) \
- ({ \
-       /* Dependency order vs. p above. */ \
--      typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \
-+      typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \
-       RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \
-       rcu_dereference_sparse(p, space); \
-       ((typeof(*p) __force __kernel *)(________p1)); \
-@@ -355,7 +355,7 @@ static inline void rcu_preempt_sleep_check(void) { }
- #define rcu_dereference_raw(p) \
- ({ \
-       /* Dependency order vs. p above. */ \
--      typeof(p) ________p1 = lockless_dereference(p); \
-+      typeof(p) ________p1 = READ_ONCE(p); \
-       ((typeof(*p) __force __kernel *)(________p1)); \
- })
- 
-diff --git a/mm/slab.h b/mm/slab.h
-index 6885e1192ec5..494cccef822a 100644
---- a/mm/slab.h
-+++ b/mm/slab.h
-@@ -257,7 +257,7 @@ cache_from_memcg_idx(struct kmem_cache *s, int idx)
-        * memcg_caches issues a write barrier to match this (see
-        * memcg_create_kmem_cache()).
-        */
--      cachep = lockless_dereference(arr->entries[idx]);
-+      cachep = READ_ONCE(arr->entries[idx]);
-       rcu_read_unlock();
- 
-       return cachep;
-diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
-index 939050169d12..18685de61288 100644
---- a/arch/x86/events/core.c
-+++ b/arch/x86/events/core.c
-@@ -2336,7 +2336,7 @@ static unsigned long get_segment_base(unsigned int segment)
-               struct ldt_struct *ldt;
- 
-               /* IRQs are off, so this synchronizes with smp_store_release */
--              ldt = lockless_dereference(current->active_mm->context.ldt);
-+              ldt = READ_ONCE(current->active_mm->context.ldt);
-               if (!ldt || idx >= ldt->nr_entries)
-                       return 0;
- 
-diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
-index 0402d44deb4d..b8be2413cb74 100644
---- a/arch/x86/kernel/ldt.c
-+++ b/arch/x86/kernel/ldt.c
-@@ -102,7 +102,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
- static void install_ldt(struct mm_struct *current_mm,
-                       struct ldt_struct *ldt)
- {
--      /* Synchronizes with lockless_dereference in load_mm_ldt. */
-+      /* Synchronizes with READ_ONCE in load_mm_ldt. */
-       smp_store_release(&current_mm->context.ldt, ldt);
- 
-       /* Activate the LDT for all CPUs using current_mm. */
-diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
-index d24e4b05f5da..731b7ffc7e37 100644
---- a/drivers/md/dm-mpath.c
-+++ b/drivers/md/dm-mpath.c
-@@ -366,7 +366,7 @@ static struct pgpath *choose_path_in_pg(struct multipath *m,
- 
-       pgpath = path_to_pgpath(path);
- 
--      if (unlikely(lockless_dereference(m->current_pg) != pg)) {
-+      if (unlikely(READ_ONCE(m->current_pg) != pg)) {
-               /* Only update current_pgpath if pg changed */
-               spin_lock_irqsave(&m->lock, flags);
-               m->current_pgpath = pgpath;
-@@ -390,7 +390,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
-       }
- 
-       /* Were we instructed to switch PG? */
--      if (lockless_dereference(m->next_pg)) {
-+      if (READ_ONCE(m->next_pg)) {
-               spin_lock_irqsave(&m->lock, flags);
-               pg = m->next_pg;
-               if (!pg) {
-@@ -406,7 +406,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
- 
-       /* Don't change PG until it has no remaining paths */
- check_current_pg:
--      pg = lockless_dereference(m->current_pg);
-+      pg = READ_ONCE(m->current_pg);
-       if (pg) {
-               pgpath = choose_path_in_pg(m, pg, nr_bytes);
-               if (!IS_ERR_OR_NULL(pgpath))
-@@ -473,7 +473,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
-       struct request *clone;
- 
-       /* Do we need to select a new pgpath? */
--      pgpath = lockless_dereference(m->current_pgpath);
-+      pgpath = READ_ONCE(m->current_pgpath);
-       if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
-               pgpath = choose_pgpath(m, nr_bytes);
- 
-@@ -535,7 +535,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
-       bool queue_io;
- 
-       /* Do we need to select a new pgpath? */
--      pgpath = lockless_dereference(m->current_pgpath);
-+      pgpath = READ_ONCE(m->current_pgpath);
-       queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
-       if (!pgpath || !queue_io)
-               pgpath = choose_pgpath(m, nr_bytes);
-@@ -1799,7 +1799,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
-       struct pgpath *current_pgpath;
-       int r;
- 
--      current_pgpath = lockless_dereference(m->current_pgpath);
-+      current_pgpath = READ_ONCE(m->current_pgpath);
-       if (!current_pgpath)
-               current_pgpath = choose_pgpath(m, 0);
- 
-@@ -1821,7 +1821,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
-       }
- 
-       if (r == -ENOTCONN) {
--              if (!lockless_dereference(m->current_pg)) {
-+              if (!READ_ONCE(m->current_pg)) {
-                       /* Path status changed, redo selection */
-                       (void) choose_pgpath(m, 0);
-               }
-@@ -1890,9 +1890,9 @@ static int multipath_busy(struct dm_target *ti)
-               return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
- 
-       /* Guess which priority_group will be used at next mapping time */
--      pg = lockless_dereference(m->current_pg);
--      next_pg = lockless_dereference(m->next_pg);
--      if (unlikely(!lockless_dereference(m->current_pgpath) && next_pg))
-+      pg = READ_ONCE(m->current_pg);
-+      next_pg = READ_ONCE(m->next_pg);
-+      if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg))
-               pg = next_pg;
- 
-       if (!pg) {
-diff --git a/fs/dcache.c b/fs/dcache.c
-index 3203470c59c2..ccc2bcdcfdfb 100644
---- a/fs/dcache.c
-+++ b/fs/dcache.c
-@@ -231,7 +231,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
- {
-       /*
-        * Be careful about RCU walk racing with rename:
--       * use 'lockless_dereference' to fetch the name pointer.
-+       * use 'READ_ONCE' to fetch the name pointer.
-        *
-        * NOTE! Even if a rename will mean that the length
-        * was not loaded atomically, we don't care. The
-@@ -245,7 +245,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
-        * early because the data cannot match (there can
-        * be no NUL in the ct/tcount data)
-        */
--      const unsigned char *cs = lockless_dereference(dentry->d_name.name);
-+      const unsigned char *cs = READ_ONCE(dentry->d_name.name);
- 
-       return dentry_string_cmp(cs, ct, tcount);
- }
-diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
-index 3ff960372cb9..7920a3f62c19 100644
---- a/fs/overlayfs/readdir.c
-+++ b/fs/overlayfs/readdir.c
-@@ -440,7 +440,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
-       if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
-               struct inode *inode = file_inode(file);
- 
--              realfile = lockless_dereference(od->upperfile);
-+              realfile = READ_ONCE(od->upperfile);
-               if (!realfile) {
-                       struct path upperpath;
- 
-diff --git a/kernel/events/core.c b/kernel/events/core.c
-index 5d4398d1fa19..9f51738bf32e 100644
---- a/kernel/events/core.c
-+++ b/kernel/events/core.c
-@@ -4221,7 +4221,7 @@ static void perf_remove_from_owner(struct perf_event *event)
-        * indeed free this event, otherwise we need to serialize on
-        * owner->perf_event_mutex.
-        */
--      owner = lockless_dereference(event->owner);
-+      owner = READ_ONCE(event->owner);
-       if (owner) {
-               /*
-                * Since delayed_put_task_struct() also drops the last
-@@ -4318,7 +4318,7 @@ int perf_event_release_kernel(struct perf_event *event)
-                * Cannot change, child events are not migrated, see the
-                * comment with perf_event_ctx_lock_nested().
-                */
--              ctx = lockless_dereference(child->ctx);
-+              ctx = READ_ONCE(child->ctx);
-               /*
-                * Since child_mutex nests inside ctx::mutex, we must jump
-                * through hoops. We start by grabbing a reference on the ctx.
-diff --git a/kernel/seccomp.c b/kernel/seccomp.c
-index 34aced9ff3ff..3fd2c4b23697 100644
---- a/kernel/seccomp.c
-+++ b/kernel/seccomp.c
-@@ -188,7 +188,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
-       u32 ret = SECCOMP_RET_ALLOW;
-       /* Make sure cross-thread synced filter points somewhere sane. */
-       struct seccomp_filter *f =
--                      lockless_dereference(current->seccomp.filter);
-+                      READ_ONCE(current->seccomp.filter);
- 
-       /* Ensure unexpected behavior doesn't result in failing open. */
-       if (unlikely(WARN_ON(f == NULL)))
-diff --git a/kernel/task_work.c b/kernel/task_work.c
-index e056d5429783..0371093a2331 100644
---- a/kernel/task_work.c
-+++ b/kernel/task_work.c
-@@ -67,7 +67,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
-        * we raced with task_work_run(), *pprev == NULL/exited.
-        */
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
--      while ((work = lockless_dereference(*pprev))) {
-+      while ((work = READ_ONCE(*pprev))) {
-               if (work->func != func)
-                       pprev = &work->next;
-               else if (cmpxchg(pprev, work, work->next) == work)
--- 
-2.14.2
-
diff --git a/patches/kernel/0129-locking-barriers-Convert-users-of-lockless_dereferen.patch b/patches/kernel/0129-locking-barriers-Convert-users-of-lockless_dereferen.patch

new file mode 100644 (file)

index 0000000..e519140
--- /dev/null
+++ b/patches/kernel/0129-locking-barriers-Convert-users-of-lockless_dereferen.patch
@@ -0,0 +1,324 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Tue, 24 Oct 2017 11:22:48 +0100
+Subject: [PATCH] locking/barriers: Convert users of lockless_dereference() to
+ READ_ONCE()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+[ Note, this is a Git cherry-pick of the following commit:
+
+    506458efaf15 ("locking/barriers: Convert users of lockless_dereference() to READ_ONCE()")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+READ_ONCE() now has an implicit smp_read_barrier_depends() call, so it
+can be used instead of lockless_dereference() without any change in
+semantics.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1508840570-22169-4-git-send-email-will.deacon@arm.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 3382290ed2d5e275429cef510ab21889d3ccd164)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 7252704bfd83e951d00ec75526ed2bf64a7f6ee1)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/mmu_context.h |  4 ++--
+ fs/overlayfs/ovl_entry.h           |  2 +-
+ include/linux/rculist.h            |  4 ++--
+ include/linux/rcupdate.h           |  4 ++--
+ mm/slab.h                          |  2 +-
+ arch/x86/events/core.c             |  2 +-
+ arch/x86/kernel/ldt.c              |  2 +-
+ drivers/md/dm-mpath.c              | 20 ++++++++++----------
+ fs/dcache.c                        |  4 ++--
+ fs/overlayfs/readdir.c             |  2 +-
+ kernel/events/core.c               |  4 ++--
+ kernel/seccomp.c                   |  2 +-
+ kernel/task_work.c                 |  2 +-
+ 13 files changed, 27 insertions(+), 27 deletions(-)
+
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index 3c856a15b98e..efc530642f7d 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -72,8 +72,8 @@ static inline void load_mm_ldt(struct mm_struct *mm)
+ #ifdef CONFIG_MODIFY_LDT_SYSCALL
+       struct ldt_struct *ldt;
+ 
+-      /* lockless_dereference synchronizes with smp_store_release */
+-      ldt = lockless_dereference(mm->context.ldt);
++      /* READ_ONCE synchronizes with smp_store_release */
++      ldt = READ_ONCE(mm->context.ldt);
+ 
+       /*
+        * Any change to mm->context.ldt is followed by an IPI to all
+diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
+index 25d9b5adcd42..36b49bd09264 100644
+--- a/fs/overlayfs/ovl_entry.h
++++ b/fs/overlayfs/ovl_entry.h
+@@ -77,5 +77,5 @@ static inline struct ovl_inode *OVL_I(struct inode *inode)
+ 
+ static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
+ {
+-      return lockless_dereference(oi->__upperdentry);
++      return READ_ONCE(oi->__upperdentry);
+ }
+diff --git a/include/linux/rculist.h b/include/linux/rculist.h
+index b1fd8bf85fdc..3a2bb7d8ed4d 100644
+--- a/include/linux/rculist.h
++++ b/include/linux/rculist.h
+@@ -274,7 +274,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
+  * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+  */
+ #define list_entry_rcu(ptr, type, member) \
+-      container_of(lockless_dereference(ptr), type, member)
++      container_of(READ_ONCE(ptr), type, member)
+ 
+ /**
+  * Where are list_empty_rcu() and list_first_entry_rcu()?
+@@ -367,7 +367,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
+  * example is when items are added to the list, but never deleted.
+  */
+ #define list_entry_lockless(ptr, type, member) \
+-      container_of((typeof(ptr))lockless_dereference(ptr), type, member)
++      container_of((typeof(ptr))READ_ONCE(ptr), type, member)
+ 
+ /**
+  * list_for_each_entry_lockless - iterate over rcu list of given type
+diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
+index f816fc72b51e..ae494eb7b401 100644
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -341,7 +341,7 @@ static inline void rcu_preempt_sleep_check(void) { }
+ #define __rcu_dereference_check(p, c, space) \
+ ({ \
+       /* Dependency order vs. p above. */ \
+-      typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \
++      typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \
+       RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \
+       rcu_dereference_sparse(p, space); \
+       ((typeof(*p) __force __kernel *)(________p1)); \
+@@ -355,7 +355,7 @@ static inline void rcu_preempt_sleep_check(void) { }
+ #define rcu_dereference_raw(p) \
+ ({ \
+       /* Dependency order vs. p above. */ \
+-      typeof(p) ________p1 = lockless_dereference(p); \
++      typeof(p) ________p1 = READ_ONCE(p); \
+       ((typeof(*p) __force __kernel *)(________p1)); \
+ })
+ 
+diff --git a/mm/slab.h b/mm/slab.h
+index 6885e1192ec5..494cccef822a 100644
+--- a/mm/slab.h
++++ b/mm/slab.h
+@@ -257,7 +257,7 @@ cache_from_memcg_idx(struct kmem_cache *s, int idx)
+        * memcg_caches issues a write barrier to match this (see
+        * memcg_create_kmem_cache()).
+        */
+-      cachep = lockless_dereference(arr->entries[idx]);
++      cachep = READ_ONCE(arr->entries[idx]);
+       rcu_read_unlock();
+ 
+       return cachep;
+diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
+index 939050169d12..18685de61288 100644
+--- a/arch/x86/events/core.c
++++ b/arch/x86/events/core.c
+@@ -2336,7 +2336,7 @@ static unsigned long get_segment_base(unsigned int segment)
+               struct ldt_struct *ldt;
+ 
+               /* IRQs are off, so this synchronizes with smp_store_release */
+-              ldt = lockless_dereference(current->active_mm->context.ldt);
++              ldt = READ_ONCE(current->active_mm->context.ldt);
+               if (!ldt || idx >= ldt->nr_entries)
+                       return 0;
+ 
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
+index 0402d44deb4d..b8be2413cb74 100644
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -102,7 +102,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
+ static void install_ldt(struct mm_struct *current_mm,
+                       struct ldt_struct *ldt)
+ {
+-      /* Synchronizes with lockless_dereference in load_mm_ldt. */
++      /* Synchronizes with READ_ONCE in load_mm_ldt. */
+       smp_store_release(&current_mm->context.ldt, ldt);
+ 
+       /* Activate the LDT for all CPUs using current_mm. */
+diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
+index d24e4b05f5da..731b7ffc7e37 100644
+--- a/drivers/md/dm-mpath.c
++++ b/drivers/md/dm-mpath.c
+@@ -366,7 +366,7 @@ static struct pgpath *choose_path_in_pg(struct multipath *m,
+ 
+       pgpath = path_to_pgpath(path);
+ 
+-      if (unlikely(lockless_dereference(m->current_pg) != pg)) {
++      if (unlikely(READ_ONCE(m->current_pg) != pg)) {
+               /* Only update current_pgpath if pg changed */
+               spin_lock_irqsave(&m->lock, flags);
+               m->current_pgpath = pgpath;
+@@ -390,7 +390,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
+       }
+ 
+       /* Were we instructed to switch PG? */
+-      if (lockless_dereference(m->next_pg)) {
++      if (READ_ONCE(m->next_pg)) {
+               spin_lock_irqsave(&m->lock, flags);
+               pg = m->next_pg;
+               if (!pg) {
+@@ -406,7 +406,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
+ 
+       /* Don't change PG until it has no remaining paths */
+ check_current_pg:
+-      pg = lockless_dereference(m->current_pg);
++      pg = READ_ONCE(m->current_pg);
+       if (pg) {
+               pgpath = choose_path_in_pg(m, pg, nr_bytes);
+               if (!IS_ERR_OR_NULL(pgpath))
+@@ -473,7 +473,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
+       struct request *clone;
+ 
+       /* Do we need to select a new pgpath? */
+-      pgpath = lockless_dereference(m->current_pgpath);
++      pgpath = READ_ONCE(m->current_pgpath);
+       if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
+               pgpath = choose_pgpath(m, nr_bytes);
+ 
+@@ -535,7 +535,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
+       bool queue_io;
+ 
+       /* Do we need to select a new pgpath? */
+-      pgpath = lockless_dereference(m->current_pgpath);
++      pgpath = READ_ONCE(m->current_pgpath);
+       queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
+       if (!pgpath || !queue_io)
+               pgpath = choose_pgpath(m, nr_bytes);
+@@ -1799,7 +1799,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
+       struct pgpath *current_pgpath;
+       int r;
+ 
+-      current_pgpath = lockless_dereference(m->current_pgpath);
++      current_pgpath = READ_ONCE(m->current_pgpath);
+       if (!current_pgpath)
+               current_pgpath = choose_pgpath(m, 0);
+ 
+@@ -1821,7 +1821,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
+       }
+ 
+       if (r == -ENOTCONN) {
+-              if (!lockless_dereference(m->current_pg)) {
++              if (!READ_ONCE(m->current_pg)) {
+                       /* Path status changed, redo selection */
+                       (void) choose_pgpath(m, 0);
+               }
+@@ -1890,9 +1890,9 @@ static int multipath_busy(struct dm_target *ti)
+               return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
+ 
+       /* Guess which priority_group will be used at next mapping time */
+-      pg = lockless_dereference(m->current_pg);
+-      next_pg = lockless_dereference(m->next_pg);
+-      if (unlikely(!lockless_dereference(m->current_pgpath) && next_pg))
++      pg = READ_ONCE(m->current_pg);
++      next_pg = READ_ONCE(m->next_pg);
++      if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg))
+               pg = next_pg;
+ 
+       if (!pg) {
+diff --git a/fs/dcache.c b/fs/dcache.c
+index 3203470c59c2..ccc2bcdcfdfb 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -231,7 +231,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
+ {
+       /*
+        * Be careful about RCU walk racing with rename:
+-       * use 'lockless_dereference' to fetch the name pointer.
++       * use 'READ_ONCE' to fetch the name pointer.
+        *
+        * NOTE! Even if a rename will mean that the length
+        * was not loaded atomically, we don't care. The
+@@ -245,7 +245,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
+        * early because the data cannot match (there can
+        * be no NUL in the ct/tcount data)
+        */
+-      const unsigned char *cs = lockless_dereference(dentry->d_name.name);
++      const unsigned char *cs = READ_ONCE(dentry->d_name.name);
+ 
+       return dentry_string_cmp(cs, ct, tcount);
+ }
+diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
+index 3ff960372cb9..7920a3f62c19 100644
+--- a/fs/overlayfs/readdir.c
++++ b/fs/overlayfs/readdir.c
+@@ -440,7 +440,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
+       if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
+               struct inode *inode = file_inode(file);
+ 
+-              realfile = lockless_dereference(od->upperfile);
++              realfile = READ_ONCE(od->upperfile);
+               if (!realfile) {
+                       struct path upperpath;
+ 
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 5d4398d1fa19..9f51738bf32e 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -4221,7 +4221,7 @@ static void perf_remove_from_owner(struct perf_event *event)
+        * indeed free this event, otherwise we need to serialize on
+        * owner->perf_event_mutex.
+        */
+-      owner = lockless_dereference(event->owner);
++      owner = READ_ONCE(event->owner);
+       if (owner) {
+               /*
+                * Since delayed_put_task_struct() also drops the last
+@@ -4318,7 +4318,7 @@ int perf_event_release_kernel(struct perf_event *event)
+                * Cannot change, child events are not migrated, see the
+                * comment with perf_event_ctx_lock_nested().
+                */
+-              ctx = lockless_dereference(child->ctx);
++              ctx = READ_ONCE(child->ctx);
+               /*
+                * Since child_mutex nests inside ctx::mutex, we must jump
+                * through hoops. We start by grabbing a reference on the ctx.
+diff --git a/kernel/seccomp.c b/kernel/seccomp.c
+index 34aced9ff3ff..3fd2c4b23697 100644
+--- a/kernel/seccomp.c
++++ b/kernel/seccomp.c
+@@ -188,7 +188,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
+       u32 ret = SECCOMP_RET_ALLOW;
+       /* Make sure cross-thread synced filter points somewhere sane. */
+       struct seccomp_filter *f =
+-                      lockless_dereference(current->seccomp.filter);
++                      READ_ONCE(current->seccomp.filter);
+ 
+       /* Ensure unexpected behavior doesn't result in failing open. */
+       if (unlikely(WARN_ON(f == NULL)))
+diff --git a/kernel/task_work.c b/kernel/task_work.c
+index e056d5429783..0371093a2331 100644
+--- a/kernel/task_work.c
++++ b/kernel/task_work.c
+@@ -67,7 +67,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
+        * we raced with task_work_run(), *pprev == NULL/exited.
+        */
+       raw_spin_lock_irqsave(&task->pi_lock, flags);
+-      while ((work = lockless_dereference(*pprev))) {
++      while ((work = READ_ONCE(*pprev))) {
+               if (work->func != func)
+                       pprev = &work->next;
+               else if (cmpxchg(pprev, work, work->next) == work)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0129-x86-mm-kasan-Don-t-use-vmemmap_populate-to-initializ.patch b/patches/kernel/0129-x86-mm-kasan-Don-t-use-vmemmap_populate-to-initializ.patch

deleted file mode 100644 (file)

index 74c149b..0000000
--- a/patches/kernel/0129-x86-mm-kasan-Don-t-use-vmemmap_populate-to-initializ.patch
+++ /dev/null
@@ -1,266 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Date: Wed, 15 Nov 2017 17:36:35 -0800
-Subject: [PATCH] x86/mm/kasan: Don't use vmemmap_populate() to initialize
- shadow
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-[ Note, this is a Git cherry-pick of the following commit:
-
-    d17a1d97dc20: ("x86/mm/kasan: don't use vmemmap_populate() to initialize shadow")
-
-  ... for easier x86 PTI code testing and back-porting. ]
-
-The KASAN shadow is currently mapped using vmemmap_populate() since that
-provides a semi-convenient way to map pages into init_top_pgt.  However,
-since that no longer zeroes the mapped pages, it is not suitable for
-KASAN, which requires zeroed shadow memory.
-
-Add kasan_populate_shadow() interface and use it instead of
-vmemmap_populate().  Besides, this allows us to take advantage of
-gigantic pages and use them to populate the shadow, which should save us
-some memory wasted on page tables and reduce TLB pressure.
-
-Link: http://lkml.kernel.org/r/20171103185147.2688-2-pasha.tatashin@oracle.com
-Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Steven Sistare <steven.sistare@oracle.com>
-Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
-Cc: Bob Picco <bob.picco@oracle.com>
-Cc: Michal Hocko <mhocko@suse.com>
-Cc: Alexander Potapenko <glider@google.com>
-Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
-Cc: Catalin Marinas <catalin.marinas@arm.com>
-Cc: Christian Borntraeger <borntraeger@de.ibm.com>
-Cc: David S. Miller <davem@davemloft.net>
-Cc: Dmitry Vyukov <dvyukov@google.com>
-Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
-Cc: "H. Peter Anvin" <hpa@zytor.com>
-Cc: Ingo Molnar <mingo@redhat.com>
-Cc: Mark Rutland <mark.rutland@arm.com>
-Cc: Matthew Wilcox <willy@infradead.org>
-Cc: Mel Gorman <mgorman@techsingularity.net>
-Cc: Michal Hocko <mhocko@kernel.org>
-Cc: Sam Ravnborg <sam@ravnborg.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: Will Deacon <will.deacon@arm.com>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 2aeb07365bcd489620f71390a7d2031cd4dfb83e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f60ab0015a57d9fbf659b212d504682f069b0590)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/kasan_init_64.c | 143 +++++++++++++++++++++++++++++++++++++++++---
- arch/x86/Kconfig            |   2 +-
- 2 files changed, 137 insertions(+), 8 deletions(-)
-
-diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
-index 464089f33e80..3d7341986e13 100644
---- a/arch/x86/mm/kasan_init_64.c
-+++ b/arch/x86/mm/kasan_init_64.c
-@@ -3,12 +3,14 @@
- #include <linux/bootmem.h>
- #include <linux/kasan.h>
- #include <linux/kdebug.h>
-+#include <linux/memblock.h>
- #include <linux/mm.h>
- #include <linux/sched.h>
- #include <linux/sched/task.h>
- #include <linux/vmalloc.h>
- 
- #include <asm/e820/types.h>
-+#include <asm/pgalloc.h>
- #include <asm/tlbflush.h>
- #include <asm/sections.h>
- 
-@@ -17,7 +19,134 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
- 
- static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
- 
--static int __init map_range(struct range *range)
-+static __init void *early_alloc(size_t size, int nid)
-+{
-+      return memblock_virt_alloc_try_nid_nopanic(size, size,
-+              __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
-+}
-+
-+static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
-+                                    unsigned long end, int nid)
-+{
-+      pte_t *pte;
-+
-+      if (pmd_none(*pmd)) {
-+              void *p;
-+
-+              if (boot_cpu_has(X86_FEATURE_PSE) &&
-+                  ((end - addr) == PMD_SIZE) &&
-+                  IS_ALIGNED(addr, PMD_SIZE)) {
-+                      p = early_alloc(PMD_SIZE, nid);
-+                      if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
-+                              return;
-+                      else if (p)
-+                              memblock_free(__pa(p), PMD_SIZE);
-+              }
-+
-+              p = early_alloc(PAGE_SIZE, nid);
-+              pmd_populate_kernel(&init_mm, pmd, p);
-+      }
-+
-+      pte = pte_offset_kernel(pmd, addr);
-+      do {
-+              pte_t entry;
-+              void *p;
-+
-+              if (!pte_none(*pte))
-+                      continue;
-+
-+              p = early_alloc(PAGE_SIZE, nid);
-+              entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
-+              set_pte_at(&init_mm, addr, pte, entry);
-+      } while (pte++, addr += PAGE_SIZE, addr != end);
-+}
-+
-+static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
-+                                    unsigned long end, int nid)
-+{
-+      pmd_t *pmd;
-+      unsigned long next;
-+
-+      if (pud_none(*pud)) {
-+              void *p;
-+
-+              if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
-+                  ((end - addr) == PUD_SIZE) &&
-+                  IS_ALIGNED(addr, PUD_SIZE)) {
-+                      p = early_alloc(PUD_SIZE, nid);
-+                      if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
-+                              return;
-+                      else if (p)
-+                              memblock_free(__pa(p), PUD_SIZE);
-+              }
-+
-+              p = early_alloc(PAGE_SIZE, nid);
-+              pud_populate(&init_mm, pud, p);
-+      }
-+
-+      pmd = pmd_offset(pud, addr);
-+      do {
-+              next = pmd_addr_end(addr, end);
-+              if (!pmd_large(*pmd))
-+                      kasan_populate_pmd(pmd, addr, next, nid);
-+      } while (pmd++, addr = next, addr != end);
-+}
-+
-+static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
-+                                    unsigned long end, int nid)
-+{
-+      pud_t *pud;
-+      unsigned long next;
-+
-+      if (p4d_none(*p4d)) {
-+              void *p = early_alloc(PAGE_SIZE, nid);
-+
-+              p4d_populate(&init_mm, p4d, p);
-+      }
-+
-+      pud = pud_offset(p4d, addr);
-+      do {
-+              next = pud_addr_end(addr, end);
-+              if (!pud_large(*pud))
-+                      kasan_populate_pud(pud, addr, next, nid);
-+      } while (pud++, addr = next, addr != end);
-+}
-+
-+static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
-+                                    unsigned long end, int nid)
-+{
-+      void *p;
-+      p4d_t *p4d;
-+      unsigned long next;
-+
-+      if (pgd_none(*pgd)) {
-+              p = early_alloc(PAGE_SIZE, nid);
-+              pgd_populate(&init_mm, pgd, p);
-+      }
-+
-+      p4d = p4d_offset(pgd, addr);
-+      do {
-+              next = p4d_addr_end(addr, end);
-+              kasan_populate_p4d(p4d, addr, next, nid);
-+      } while (p4d++, addr = next, addr != end);
-+}
-+
-+static void __init kasan_populate_shadow(unsigned long addr, unsigned long end,
-+                                       int nid)
-+{
-+      pgd_t *pgd;
-+      unsigned long next;
-+
-+      addr = addr & PAGE_MASK;
-+      end = round_up(end, PAGE_SIZE);
-+      pgd = pgd_offset_k(addr);
-+      do {
-+              next = pgd_addr_end(addr, end);
-+              kasan_populate_pgd(pgd, addr, next, nid);
-+      } while (pgd++, addr = next, addr != end);
-+}
-+
-+static void __init map_range(struct range *range)
- {
-       unsigned long start;
-       unsigned long end;
-@@ -25,7 +154,7 @@ static int __init map_range(struct range *range)
-       start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
-       end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
- 
--      return vmemmap_populate(start, end, NUMA_NO_NODE);
-+      kasan_populate_shadow(start, end, early_pfn_to_nid(range->start));
- }
- 
- static void __init clear_pgds(unsigned long start,
-@@ -188,16 +317,16 @@ void __init kasan_init(void)
-               if (pfn_mapped[i].end == 0)
-                       break;
- 
--              if (map_range(&pfn_mapped[i]))
--                      panic("kasan: unable to allocate shadow!");
-+              map_range(&pfn_mapped[i]);
-       }
-+
-       kasan_populate_zero_shadow(
-               kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
-               kasan_mem_to_shadow((void *)__START_KERNEL_map));
- 
--      vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext),
--                      (unsigned long)kasan_mem_to_shadow(_end),
--                      NUMA_NO_NODE);
-+      kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
-+                            (unsigned long)kasan_mem_to_shadow(_end),
-+                            early_pfn_to_nid(__pa(_stext)));
- 
-       kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
-                       (void *)KASAN_SHADOW_END);
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 67d07802ae95..8b5499bb24bb 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -106,7 +106,7 @@ config X86
-       select HAVE_ARCH_AUDITSYSCALL
-       select HAVE_ARCH_HUGE_VMAP              if X86_64 || X86_PAE
-       select HAVE_ARCH_JUMP_LABEL
--      select HAVE_ARCH_KASAN                  if X86_64 && SPARSEMEM_VMEMMAP
-+      select HAVE_ARCH_KASAN                  if X86_64
-       select HAVE_ARCH_KGDB
-       select HAVE_ARCH_KMEMCHECK
-       select HAVE_ARCH_MMAP_RND_BITS          if MMU
--- 
-2.14.2
-
diff --git a/patches/kernel/0130-mm-sparsemem-Fix-ARM64-boot-crash-when-CONFIG_SPARSE.patch b/patches/kernel/0130-mm-sparsemem-Fix-ARM64-boot-crash-when-CONFIG_SPARSE.patch

deleted file mode 100644 (file)

index eea6854..0000000
--- a/patches/kernel/0130-mm-sparsemem-Fix-ARM64-boot-crash-when-CONFIG_SPARSE.patch
+++ /dev/null
@@ -1,88 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
-Date: Tue, 7 Nov 2017 11:33:37 +0300
-Subject: [PATCH] mm/sparsemem: Fix ARM64 boot crash when
- CONFIG_SPARSEMEM_EXTREME=y
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Since commit:
-
-  83e3c48729d9 ("mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y")
-
-we allocate the mem_section array dynamically in sparse_memory_present_with_active_regions(),
-but some architectures, like arm64, don't call the routine to initialize sparsemem.
-
-Let's move the initialization into memory_present() it should cover all
-architectures.
-
-Reported-and-tested-by: Sudeep Holla <sudeep.holla@arm.com>
-Tested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
-Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
-Acked-by: Will Deacon <will.deacon@arm.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-mm@kvack.org
-Fixes: 83e3c48729d9 ("mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y")
-Link: http://lkml.kernel.org/r/20171107083337.89952-1-kirill.shutemov@linux.intel.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 629a359bdb0e0652a8227b4ff3125431995fec6e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit fbc3acbf37de68310eb5bbc7f4d1977e7b90100e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- mm/page_alloc.c | 10 ----------
- mm/sparse.c     | 10 ++++++++++
- 2 files changed, 10 insertions(+), 10 deletions(-)
-
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 66eb23ab658d..1423da8dd16f 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -5707,16 +5707,6 @@ void __init sparse_memory_present_with_active_regions(int nid)
-       unsigned long start_pfn, end_pfn;
-       int i, this_nid;
- 
--#ifdef CONFIG_SPARSEMEM_EXTREME
--      if (!mem_section) {
--              unsigned long size, align;
--
--              size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
--              align = 1 << (INTERNODE_CACHE_SHIFT);
--              mem_section = memblock_virt_alloc(size, align);
--      }
--#endif
--
-       for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
-               memory_present(this_nid, start_pfn, end_pfn);
- }
-diff --git a/mm/sparse.c b/mm/sparse.c
-index 308a0789d1bb..9c48e4fe8ce0 100644
---- a/mm/sparse.c
-+++ b/mm/sparse.c
-@@ -210,6 +210,16 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
- {
-       unsigned long pfn;
- 
-+#ifdef CONFIG_SPARSEMEM_EXTREME
-+      if (unlikely(!mem_section)) {
-+              unsigned long size, align;
-+
-+              size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
-+              align = 1 << (INTERNODE_CACHE_SHIFT);
-+              mem_section = memblock_virt_alloc(size, align);
-+      }
-+#endif
-+
-       start &= PAGE_SECTION_MASK;
-       mminit_validate_memmodel_limits(&start, &end);
-       for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
--- 
-2.14.2
-
diff --git a/patches/kernel/0130-x86-mm-kasan-Don-t-use-vmemmap_populate-to-initializ.patch b/patches/kernel/0130-x86-mm-kasan-Don-t-use-vmemmap_populate-to-initializ.patch

new file mode 100644 (file)

index 0000000..74c149b
--- /dev/null
+++ b/patches/kernel/0130-x86-mm-kasan-Don-t-use-vmemmap_populate-to-initializ.patch
@@ -0,0 +1,266 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Date: Wed, 15 Nov 2017 17:36:35 -0800
+Subject: [PATCH] x86/mm/kasan: Don't use vmemmap_populate() to initialize
+ shadow
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+[ Note, this is a Git cherry-pick of the following commit:
+
+    d17a1d97dc20: ("x86/mm/kasan: don't use vmemmap_populate() to initialize shadow")
+
+  ... for easier x86 PTI code testing and back-porting. ]
+
+The KASAN shadow is currently mapped using vmemmap_populate() since that
+provides a semi-convenient way to map pages into init_top_pgt.  However,
+since that no longer zeroes the mapped pages, it is not suitable for
+KASAN, which requires zeroed shadow memory.
+
+Add kasan_populate_shadow() interface and use it instead of
+vmemmap_populate().  Besides, this allows us to take advantage of
+gigantic pages and use them to populate the shadow, which should save us
+some memory wasted on page tables and reduce TLB pressure.
+
+Link: http://lkml.kernel.org/r/20171103185147.2688-2-pasha.tatashin@oracle.com
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Steven Sistare <steven.sistare@oracle.com>
+Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: Bob Picco <bob.picco@oracle.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Sam Ravnborg <sam@ravnborg.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 2aeb07365bcd489620f71390a7d2031cd4dfb83e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f60ab0015a57d9fbf659b212d504682f069b0590)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/kasan_init_64.c | 143 +++++++++++++++++++++++++++++++++++++++++---
+ arch/x86/Kconfig            |   2 +-
+ 2 files changed, 137 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
+index 464089f33e80..3d7341986e13 100644
+--- a/arch/x86/mm/kasan_init_64.c
++++ b/arch/x86/mm/kasan_init_64.c
+@@ -3,12 +3,14 @@
+ #include <linux/bootmem.h>
+ #include <linux/kasan.h>
+ #include <linux/kdebug.h>
++#include <linux/memblock.h>
+ #include <linux/mm.h>
+ #include <linux/sched.h>
+ #include <linux/sched/task.h>
+ #include <linux/vmalloc.h>
+ 
+ #include <asm/e820/types.h>
++#include <asm/pgalloc.h>
+ #include <asm/tlbflush.h>
+ #include <asm/sections.h>
+ 
+@@ -17,7 +19,134 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
+ 
+ static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+ 
+-static int __init map_range(struct range *range)
++static __init void *early_alloc(size_t size, int nid)
++{
++      return memblock_virt_alloc_try_nid_nopanic(size, size,
++              __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
++}
++
++static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
++                                    unsigned long end, int nid)
++{
++      pte_t *pte;
++
++      if (pmd_none(*pmd)) {
++              void *p;
++
++              if (boot_cpu_has(X86_FEATURE_PSE) &&
++                  ((end - addr) == PMD_SIZE) &&
++                  IS_ALIGNED(addr, PMD_SIZE)) {
++                      p = early_alloc(PMD_SIZE, nid);
++                      if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
++                              return;
++                      else if (p)
++                              memblock_free(__pa(p), PMD_SIZE);
++              }
++
++              p = early_alloc(PAGE_SIZE, nid);
++              pmd_populate_kernel(&init_mm, pmd, p);
++      }
++
++      pte = pte_offset_kernel(pmd, addr);
++      do {
++              pte_t entry;
++              void *p;
++
++              if (!pte_none(*pte))
++                      continue;
++
++              p = early_alloc(PAGE_SIZE, nid);
++              entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
++              set_pte_at(&init_mm, addr, pte, entry);
++      } while (pte++, addr += PAGE_SIZE, addr != end);
++}
++
++static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
++                                    unsigned long end, int nid)
++{
++      pmd_t *pmd;
++      unsigned long next;
++
++      if (pud_none(*pud)) {
++              void *p;
++
++              if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
++                  ((end - addr) == PUD_SIZE) &&
++                  IS_ALIGNED(addr, PUD_SIZE)) {
++                      p = early_alloc(PUD_SIZE, nid);
++                      if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
++                              return;
++                      else if (p)
++                              memblock_free(__pa(p), PUD_SIZE);
++              }
++
++              p = early_alloc(PAGE_SIZE, nid);
++              pud_populate(&init_mm, pud, p);
++      }
++
++      pmd = pmd_offset(pud, addr);
++      do {
++              next = pmd_addr_end(addr, end);
++              if (!pmd_large(*pmd))
++                      kasan_populate_pmd(pmd, addr, next, nid);
++      } while (pmd++, addr = next, addr != end);
++}
++
++static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
++                                    unsigned long end, int nid)
++{
++      pud_t *pud;
++      unsigned long next;
++
++      if (p4d_none(*p4d)) {
++              void *p = early_alloc(PAGE_SIZE, nid);
++
++              p4d_populate(&init_mm, p4d, p);
++      }
++
++      pud = pud_offset(p4d, addr);
++      do {
++              next = pud_addr_end(addr, end);
++              if (!pud_large(*pud))
++                      kasan_populate_pud(pud, addr, next, nid);
++      } while (pud++, addr = next, addr != end);
++}
++
++static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
++                                    unsigned long end, int nid)
++{
++      void *p;
++      p4d_t *p4d;
++      unsigned long next;
++
++      if (pgd_none(*pgd)) {
++              p = early_alloc(PAGE_SIZE, nid);
++              pgd_populate(&init_mm, pgd, p);
++      }
++
++      p4d = p4d_offset(pgd, addr);
++      do {
++              next = p4d_addr_end(addr, end);
++              kasan_populate_p4d(p4d, addr, next, nid);
++      } while (p4d++, addr = next, addr != end);
++}
++
++static void __init kasan_populate_shadow(unsigned long addr, unsigned long end,
++                                       int nid)
++{
++      pgd_t *pgd;
++      unsigned long next;
++
++      addr = addr & PAGE_MASK;
++      end = round_up(end, PAGE_SIZE);
++      pgd = pgd_offset_k(addr);
++      do {
++              next = pgd_addr_end(addr, end);
++              kasan_populate_pgd(pgd, addr, next, nid);
++      } while (pgd++, addr = next, addr != end);
++}
++
++static void __init map_range(struct range *range)
+ {
+       unsigned long start;
+       unsigned long end;
+@@ -25,7 +154,7 @@ static int __init map_range(struct range *range)
+       start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
+       end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
+ 
+-      return vmemmap_populate(start, end, NUMA_NO_NODE);
++      kasan_populate_shadow(start, end, early_pfn_to_nid(range->start));
+ }
+ 
+ static void __init clear_pgds(unsigned long start,
+@@ -188,16 +317,16 @@ void __init kasan_init(void)
+               if (pfn_mapped[i].end == 0)
+                       break;
+ 
+-              if (map_range(&pfn_mapped[i]))
+-                      panic("kasan: unable to allocate shadow!");
++              map_range(&pfn_mapped[i]);
+       }
++
+       kasan_populate_zero_shadow(
+               kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
+               kasan_mem_to_shadow((void *)__START_KERNEL_map));
+ 
+-      vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext),
+-                      (unsigned long)kasan_mem_to_shadow(_end),
+-                      NUMA_NO_NODE);
++      kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
++                            (unsigned long)kasan_mem_to_shadow(_end),
++                            early_pfn_to_nid(__pa(_stext)));
+ 
+       kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
+                       (void *)KASAN_SHADOW_END);
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 67d07802ae95..8b5499bb24bb 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -106,7 +106,7 @@ config X86
+       select HAVE_ARCH_AUDITSYSCALL
+       select HAVE_ARCH_HUGE_VMAP              if X86_64 || X86_PAE
+       select HAVE_ARCH_JUMP_LABEL
+-      select HAVE_ARCH_KASAN                  if X86_64 && SPARSEMEM_VMEMMAP
++      select HAVE_ARCH_KASAN                  if X86_64
+       select HAVE_ARCH_KGDB
+       select HAVE_ARCH_KMEMCHECK
+       select HAVE_ARCH_MMAP_RND_BITS          if MMU
+-- 
+2.14.2
+
diff --git a/patches/kernel/0131-mm-sparsemem-Fix-ARM64-boot-crash-when-CONFIG_SPARSE.patch b/patches/kernel/0131-mm-sparsemem-Fix-ARM64-boot-crash-when-CONFIG_SPARSE.patch

new file mode 100644 (file)

index 0000000..eea6854
--- /dev/null
+++ b/patches/kernel/0131-mm-sparsemem-Fix-ARM64-boot-crash-when-CONFIG_SPARSE.patch
@@ -0,0 +1,88 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Tue, 7 Nov 2017 11:33:37 +0300
+Subject: [PATCH] mm/sparsemem: Fix ARM64 boot crash when
+ CONFIG_SPARSEMEM_EXTREME=y
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Since commit:
+
+  83e3c48729d9 ("mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y")
+
+we allocate the mem_section array dynamically in sparse_memory_present_with_active_regions(),
+but some architectures, like arm64, don't call the routine to initialize sparsemem.
+
+Let's move the initialization into memory_present() it should cover all
+architectures.
+
+Reported-and-tested-by: Sudeep Holla <sudeep.holla@arm.com>
+Tested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Acked-by: Will Deacon <will.deacon@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Fixes: 83e3c48729d9 ("mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y")
+Link: http://lkml.kernel.org/r/20171107083337.89952-1-kirill.shutemov@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 629a359bdb0e0652a8227b4ff3125431995fec6e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit fbc3acbf37de68310eb5bbc7f4d1977e7b90100e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ mm/page_alloc.c | 10 ----------
+ mm/sparse.c     | 10 ++++++++++
+ 2 files changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 66eb23ab658d..1423da8dd16f 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -5707,16 +5707,6 @@ void __init sparse_memory_present_with_active_regions(int nid)
+       unsigned long start_pfn, end_pfn;
+       int i, this_nid;
+ 
+-#ifdef CONFIG_SPARSEMEM_EXTREME
+-      if (!mem_section) {
+-              unsigned long size, align;
+-
+-              size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
+-              align = 1 << (INTERNODE_CACHE_SHIFT);
+-              mem_section = memblock_virt_alloc(size, align);
+-      }
+-#endif
+-
+       for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
+               memory_present(this_nid, start_pfn, end_pfn);
+ }
+diff --git a/mm/sparse.c b/mm/sparse.c
+index 308a0789d1bb..9c48e4fe8ce0 100644
+--- a/mm/sparse.c
++++ b/mm/sparse.c
+@@ -210,6 +210,16 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
+ {
+       unsigned long pfn;
+ 
++#ifdef CONFIG_SPARSEMEM_EXTREME
++      if (unlikely(!mem_section)) {
++              unsigned long size, align;
++
++              size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
++              align = 1 << (INTERNODE_CACHE_SHIFT);
++              mem_section = memblock_virt_alloc(size, align);
++      }
++#endif
++
+       start &= PAGE_SECTION_MASK;
+       mminit_validate_memmodel_limits(&start, &end);
+       for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
+-- 
+2.14.2
+
diff --git a/patches/kernel/0131-objtool-Move-synced-files-to-their-original-relative.patch b/patches/kernel/0131-objtool-Move-synced-files-to-their-original-relative.patch

deleted file mode 100644 (file)

index 5c1bc25..0000000
--- a/patches/kernel/0131-objtool-Move-synced-files-to-their-original-relative.patch
+++ /dev/null
@@ -1,245 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Mon, 6 Nov 2017 07:21:50 -0600
-Subject: [PATCH] objtool: Move synced files to their original relative
- locations
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This will enable more straightforward comparisons, and it also makes the
-files 100% identical.
-
-Suggested-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/407b2aaa317741f48fcf821592c0e96ab3be1890.1509974346.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit b90671a530137f42325b89c0848ca58d865c1710)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 3673cdbc4be1671fad71a4968a9f55357d9d356c)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- .../arch/x86/{insn => lib}/x86-opcode-map.txt      |  0
- tools/objtool/Makefile                             | 22 ++++++++++++----------
- .../objtool/arch/x86/{insn => include/asm}/inat.h  |  2 +-
- .../arch/x86/{insn => include/asm}/inat_types.h    |  0
- .../objtool/arch/x86/{insn => include/asm}/insn.h  |  2 +-
- .../objtool/{ => arch/x86/include/asm}/orc_types.h |  0
- tools/objtool/orc.h                                |  2 +-
- tools/objtool/arch/x86/decode.c                    |  6 +++---
- tools/objtool/arch/x86/{insn => lib}/inat.c        |  2 +-
- tools/objtool/arch/x86/{insn => lib}/insn.c        |  4 ++--
- tools/objtool/.gitignore                           |  2 +-
- tools/objtool/arch/x86/Build                       | 10 +++++-----
- .../arch/x86/{insn => tools}/gen-insn-attr-x86.awk |  0
- 13 files changed, 27 insertions(+), 25 deletions(-)
- rename tools/objtool/arch/x86/{insn => lib}/x86-opcode-map.txt (100%)
- rename tools/objtool/arch/x86/{insn => include/asm}/inat.h (99%)
- rename tools/objtool/arch/x86/{insn => include/asm}/inat_types.h (100%)
- rename tools/objtool/arch/x86/{insn => include/asm}/insn.h (99%)
- rename tools/objtool/{ => arch/x86/include/asm}/orc_types.h (100%)
- rename tools/objtool/arch/x86/{insn => lib}/inat.c (99%)
- rename tools/objtool/arch/x86/{insn => lib}/insn.c (99%)
- rename tools/objtool/arch/x86/{insn => tools}/gen-insn-attr-x86.awk (100%)
-
-diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
-similarity index 100%
-rename from tools/objtool/arch/x86/insn/x86-opcode-map.txt
-rename to tools/objtool/arch/x86/lib/x86-opcode-map.txt
-diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
-index 3a6425fefc43..f95f48e445c3 100644
---- a/tools/objtool/Makefile
-+++ b/tools/objtool/Makefile
-@@ -24,7 +24,9 @@ OBJTOOL_IN := $(OBJTOOL)-in.o
- 
- all: $(OBJTOOL)
- 
--INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi
-+INCLUDES := -I$(srctree)/tools/include \
-+          -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
-+          -I$(srctree)/tools/objtool/arch/$(HOSTARCH)/include
- CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -fomit-frame-pointer -O2 -g $(INCLUDES)
- LDFLAGS  += -lelf $(LIBSUBCMD)
- 
-@@ -44,16 +46,16 @@ $(OBJTOOL_IN): fixdep FORCE
- $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
-       @(diff -I 2>&1 | grep -q 'option requires an argument' && \
-       test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
--      diff -I'^#include' arch/x86/insn/insn.c ../../arch/x86/lib/insn.c >/dev/null && \
--      diff -I'^#include' arch/x86/insn/inat.c ../../arch/x86/lib/inat.c >/dev/null && \
--      diff arch/x86/insn/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \
--      diff arch/x86/insn/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \
--      diff -I'^#include' arch/x86/insn/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \
--      diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
--      diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
-+      diff arch/x86/lib/insn.c ../../arch/x86/lib/insn.c >/dev/null && \
-+      diff arch/x86/lib/inat.c ../../arch/x86/lib/inat.c >/dev/null && \
-+      diff arch/x86/lib/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \
-+      diff arch/x86/tools/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \
-+      diff arch/x86/include/asm/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \
-+      diff arch/x86/include/asm/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
-+      diff arch/x86/include/asm/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
-       || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true
-       @(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
--      diff ../../arch/x86/include/asm/orc_types.h orc_types.h >/dev/null) \
-+      diff ../../arch/x86/include/asm/orc_types.h arch/x86/include/asm/orc_types.h >/dev/null) \
-       || echo "warning: objtool: orc_types.h differs from kernel" >&2 )) || true
-       $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
- 
-@@ -64,7 +66,7 @@ $(LIBSUBCMD): fixdep FORCE
- clean:
-       $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL)
-       $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
--      $(Q)$(RM) $(OUTPUT)arch/x86/insn/inat-tables.c $(OUTPUT)fixdep
-+      $(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep
- 
- FORCE:
- 
-diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/include/asm/inat.h
-similarity index 99%
-rename from tools/objtool/arch/x86/insn/inat.h
-rename to tools/objtool/arch/x86/include/asm/inat.h
-index 125ecd2a300d..02aff0867211 100644
---- a/tools/objtool/arch/x86/insn/inat.h
-+++ b/tools/objtool/arch/x86/include/asm/inat.h
-@@ -20,7 +20,7 @@
-  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-  *
-  */
--#include "inat_types.h"
-+#include <asm/inat_types.h>
- 
- /*
-  * Internal bits. Don't use bitmasks directly, because these bits are
-diff --git a/tools/objtool/arch/x86/insn/inat_types.h b/tools/objtool/arch/x86/include/asm/inat_types.h
-similarity index 100%
-rename from tools/objtool/arch/x86/insn/inat_types.h
-rename to tools/objtool/arch/x86/include/asm/inat_types.h
-diff --git a/tools/objtool/arch/x86/insn/insn.h b/tools/objtool/arch/x86/include/asm/insn.h
-similarity index 99%
-rename from tools/objtool/arch/x86/insn/insn.h
-rename to tools/objtool/arch/x86/include/asm/insn.h
-index e23578c7b1be..b3e32b010ab1 100644
---- a/tools/objtool/arch/x86/insn/insn.h
-+++ b/tools/objtool/arch/x86/include/asm/insn.h
-@@ -21,7 +21,7 @@
-  */
- 
- /* insn_attr_t is defined in inat.h */
--#include "inat.h"
-+#include <asm/inat.h>
- 
- struct insn_field {
-       union {
-diff --git a/tools/objtool/orc_types.h b/tools/objtool/arch/x86/include/asm/orc_types.h
-similarity index 100%
-rename from tools/objtool/orc_types.h
-rename to tools/objtool/arch/x86/include/asm/orc_types.h
-diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h
-index a4139e386ef3..b0e92a6d0903 100644
---- a/tools/objtool/orc.h
-+++ b/tools/objtool/orc.h
-@@ -18,7 +18,7 @@
- #ifndef _ORC_H
- #define _ORC_H
- 
--#include "orc_types.h"
-+#include <asm/orc_types.h>
- 
- struct objtool_file;
- 
-diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
-index 4559a21a8de2..92f57996d66c 100644
---- a/tools/objtool/arch/x86/decode.c
-+++ b/tools/objtool/arch/x86/decode.c
-@@ -19,9 +19,9 @@
- #include <stdlib.h>
- 
- #define unlikely(cond) (cond)
--#include "insn/insn.h"
--#include "insn/inat.c"
--#include "insn/insn.c"
-+#include <asm/insn.h>
-+#include "lib/inat.c"
-+#include "lib/insn.c"
- 
- #include "../../elf.h"
- #include "../../arch.h"
-diff --git a/tools/objtool/arch/x86/insn/inat.c b/tools/objtool/arch/x86/lib/inat.c
-similarity index 99%
-rename from tools/objtool/arch/x86/insn/inat.c
-rename to tools/objtool/arch/x86/lib/inat.c
-index e4bf28e6f4c7..c1f01a8e9f65 100644
---- a/tools/objtool/arch/x86/insn/inat.c
-+++ b/tools/objtool/arch/x86/lib/inat.c
-@@ -18,7 +18,7 @@
-  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-  *
-  */
--#include "insn.h"
-+#include <asm/insn.h>
- 
- /* Attribute tables are generated from opcode map */
- #include "inat-tables.c"
-diff --git a/tools/objtool/arch/x86/insn/insn.c b/tools/objtool/arch/x86/lib/insn.c
-similarity index 99%
-rename from tools/objtool/arch/x86/insn/insn.c
-rename to tools/objtool/arch/x86/lib/insn.c
-index ca983e2bea8b..1088eb8f3a5f 100644
---- a/tools/objtool/arch/x86/insn/insn.c
-+++ b/tools/objtool/arch/x86/lib/insn.c
-@@ -23,8 +23,8 @@
- #else
- #include <string.h>
- #endif
--#include "inat.h"
--#include "insn.h"
-+#include <asm/inat.h>
-+#include <asm/insn.h>
- 
- /* Verify next sizeof(t) bytes can be on the same instruction */
- #define validate_next(t, insn, n)     \
-diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
-index d3102c865a95..914cff12899b 100644
---- a/tools/objtool/.gitignore
-+++ b/tools/objtool/.gitignore
-@@ -1,3 +1,3 @@
--arch/x86/insn/inat-tables.c
-+arch/x86/lib/inat-tables.c
- objtool
- fixdep
-diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build
-index debbdb0b5c43..b998412c017d 100644
---- a/tools/objtool/arch/x86/Build
-+++ b/tools/objtool/arch/x86/Build
-@@ -1,12 +1,12 @@
- objtool-y += decode.o
- 
--inat_tables_script = arch/x86/insn/gen-insn-attr-x86.awk
--inat_tables_maps = arch/x86/insn/x86-opcode-map.txt
-+inat_tables_script = arch/x86/tools/gen-insn-attr-x86.awk
-+inat_tables_maps = arch/x86/lib/x86-opcode-map.txt
- 
--$(OUTPUT)arch/x86/insn/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
-+$(OUTPUT)arch/x86/lib/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
-       $(call rule_mkdir)
-       $(Q)$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
- 
--$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/insn/inat-tables.c
-+$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/lib/inat-tables.c
- 
--CFLAGS_decode.o += -I$(OUTPUT)arch/x86/insn
-+CFLAGS_decode.o += -I$(OUTPUT)arch/x86/lib
-diff --git a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk
-similarity index 100%
-rename from tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk
-rename to tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk
--- 
-2.14.2
-
diff --git a/patches/kernel/0132-objtool-Move-kernel-headers-code-sync-check-to-a-scr.patch b/patches/kernel/0132-objtool-Move-kernel-headers-code-sync-check-to-a-scr.patch

deleted file mode 100644 (file)

index 5ebc193..0000000
--- a/patches/kernel/0132-objtool-Move-kernel-headers-code-sync-check-to-a-scr.patch
+++ /dev/null
@@ -1,99 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Mon, 6 Nov 2017 07:21:51 -0600
-Subject: [PATCH] objtool: Move kernel headers/code sync check to a script
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Replace the nasty diff checks in the objtool Makefile with a clean bash
-script, and make the warnings more specific.
-
-Heavily inspired by tools/perf/check-headers.sh.
-
-Suggested-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/ab015f15ccd8c0c6008493c3c6ee3d495eaf2927.1509974346.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit a89ec413c623eb2870180bcad678046bf7bc8465)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 4e72ce95a057e744b8d580239e2d8afa51118d82)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/objtool/Makefile      | 16 +---------------
- tools/objtool/sync-check.sh | 29 +++++++++++++++++++++++++++++
- 2 files changed, 30 insertions(+), 15 deletions(-)
- create mode 100755 tools/objtool/sync-check.sh
-
-diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
-index f95f48e445c3..90b0241f3ccc 100644
---- a/tools/objtool/Makefile
-+++ b/tools/objtool/Makefile
-@@ -41,22 +41,8 @@ include $(srctree)/tools/build/Makefile.include
- $(OBJTOOL_IN): fixdep FORCE
-       @$(MAKE) $(build)=objtool
- 
--# Busybox's diff doesn't have -I, avoid warning in that case
--#
- $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
--      @(diff -I 2>&1 | grep -q 'option requires an argument' && \
--      test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
--      diff arch/x86/lib/insn.c ../../arch/x86/lib/insn.c >/dev/null && \
--      diff arch/x86/lib/inat.c ../../arch/x86/lib/inat.c >/dev/null && \
--      diff arch/x86/lib/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \
--      diff arch/x86/tools/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \
--      diff arch/x86/include/asm/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \
--      diff arch/x86/include/asm/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
--      diff arch/x86/include/asm/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
--      || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true
--      @(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
--      diff ../../arch/x86/include/asm/orc_types.h arch/x86/include/asm/orc_types.h >/dev/null) \
--      || echo "warning: objtool: orc_types.h differs from kernel" >&2 )) || true
-+      @./sync-check.sh
-       $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
- 
- 
-diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
-new file mode 100755
-index 000000000000..1470e74e9d66
---- /dev/null
-+++ b/tools/objtool/sync-check.sh
-@@ -0,0 +1,29 @@
-+#!/bin/sh
-+# SPDX-License-Identifier: GPL-2.0
-+
-+FILES='
-+arch/x86/lib/insn.c
-+arch/x86/lib/inat.c
-+arch/x86/lib/x86-opcode-map.txt
-+arch/x86/tools/gen-insn-attr-x86.awk
-+arch/x86/include/asm/insn.h
-+arch/x86/include/asm/inat.h
-+arch/x86/include/asm/inat_types.h
-+arch/x86/include/asm/orc_types.h
-+'
-+
-+check()
-+{
-+      local file=$1
-+
-+      diff $file ../../$file > /dev/null ||
-+              echo "Warning: synced file at 'tools/objtool/$file' differs from latest kernel version at '$file'"
-+}
-+
-+if [ ! -d ../../kernel ] || [ ! -d ../../tools ] || [ ! -d ../objtool ]; then
-+      exit 0
-+fi
-+
-+for i in $FILES; do
-+  check $i
-+done
--- 
-2.14.2
-
diff --git a/patches/kernel/0132-objtool-Move-synced-files-to-their-original-relative.patch b/patches/kernel/0132-objtool-Move-synced-files-to-their-original-relative.patch

new file mode 100644 (file)

index 0000000..5c1bc25
--- /dev/null
+++ b/patches/kernel/0132-objtool-Move-synced-files-to-their-original-relative.patch
@@ -0,0 +1,245 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 6 Nov 2017 07:21:50 -0600
+Subject: [PATCH] objtool: Move synced files to their original relative
+ locations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This will enable more straightforward comparisons, and it also makes the
+files 100% identical.
+
+Suggested-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/407b2aaa317741f48fcf821592c0e96ab3be1890.1509974346.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit b90671a530137f42325b89c0848ca58d865c1710)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 3673cdbc4be1671fad71a4968a9f55357d9d356c)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ .../arch/x86/{insn => lib}/x86-opcode-map.txt      |  0
+ tools/objtool/Makefile                             | 22 ++++++++++++----------
+ .../objtool/arch/x86/{insn => include/asm}/inat.h  |  2 +-
+ .../arch/x86/{insn => include/asm}/inat_types.h    |  0
+ .../objtool/arch/x86/{insn => include/asm}/insn.h  |  2 +-
+ .../objtool/{ => arch/x86/include/asm}/orc_types.h |  0
+ tools/objtool/orc.h                                |  2 +-
+ tools/objtool/arch/x86/decode.c                    |  6 +++---
+ tools/objtool/arch/x86/{insn => lib}/inat.c        |  2 +-
+ tools/objtool/arch/x86/{insn => lib}/insn.c        |  4 ++--
+ tools/objtool/.gitignore                           |  2 +-
+ tools/objtool/arch/x86/Build                       | 10 +++++-----
+ .../arch/x86/{insn => tools}/gen-insn-attr-x86.awk |  0
+ 13 files changed, 27 insertions(+), 25 deletions(-)
+ rename tools/objtool/arch/x86/{insn => lib}/x86-opcode-map.txt (100%)
+ rename tools/objtool/arch/x86/{insn => include/asm}/inat.h (99%)
+ rename tools/objtool/arch/x86/{insn => include/asm}/inat_types.h (100%)
+ rename tools/objtool/arch/x86/{insn => include/asm}/insn.h (99%)
+ rename tools/objtool/{ => arch/x86/include/asm}/orc_types.h (100%)
+ rename tools/objtool/arch/x86/{insn => lib}/inat.c (99%)
+ rename tools/objtool/arch/x86/{insn => lib}/insn.c (99%)
+ rename tools/objtool/arch/x86/{insn => tools}/gen-insn-attr-x86.awk (100%)
+
+diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
+similarity index 100%
+rename from tools/objtool/arch/x86/insn/x86-opcode-map.txt
+rename to tools/objtool/arch/x86/lib/x86-opcode-map.txt
+diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
+index 3a6425fefc43..f95f48e445c3 100644
+--- a/tools/objtool/Makefile
++++ b/tools/objtool/Makefile
+@@ -24,7 +24,9 @@ OBJTOOL_IN := $(OBJTOOL)-in.o
+ 
+ all: $(OBJTOOL)
+ 
+-INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi
++INCLUDES := -I$(srctree)/tools/include \
++          -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
++          -I$(srctree)/tools/objtool/arch/$(HOSTARCH)/include
+ CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -fomit-frame-pointer -O2 -g $(INCLUDES)
+ LDFLAGS  += -lelf $(LIBSUBCMD)
+ 
+@@ -44,16 +46,16 @@ $(OBJTOOL_IN): fixdep FORCE
+ $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
+       @(diff -I 2>&1 | grep -q 'option requires an argument' && \
+       test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
+-      diff -I'^#include' arch/x86/insn/insn.c ../../arch/x86/lib/insn.c >/dev/null && \
+-      diff -I'^#include' arch/x86/insn/inat.c ../../arch/x86/lib/inat.c >/dev/null && \
+-      diff arch/x86/insn/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \
+-      diff arch/x86/insn/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \
+-      diff -I'^#include' arch/x86/insn/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \
+-      diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
+-      diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
++      diff arch/x86/lib/insn.c ../../arch/x86/lib/insn.c >/dev/null && \
++      diff arch/x86/lib/inat.c ../../arch/x86/lib/inat.c >/dev/null && \
++      diff arch/x86/lib/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \
++      diff arch/x86/tools/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \
++      diff arch/x86/include/asm/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \
++      diff arch/x86/include/asm/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
++      diff arch/x86/include/asm/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
+       || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true
+       @(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
+-      diff ../../arch/x86/include/asm/orc_types.h orc_types.h >/dev/null) \
++      diff ../../arch/x86/include/asm/orc_types.h arch/x86/include/asm/orc_types.h >/dev/null) \
+       || echo "warning: objtool: orc_types.h differs from kernel" >&2 )) || true
+       $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
+ 
+@@ -64,7 +66,7 @@ $(LIBSUBCMD): fixdep FORCE
+ clean:
+       $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL)
+       $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+-      $(Q)$(RM) $(OUTPUT)arch/x86/insn/inat-tables.c $(OUTPUT)fixdep
++      $(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep
+ 
+ FORCE:
+ 
+diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/include/asm/inat.h
+similarity index 99%
+rename from tools/objtool/arch/x86/insn/inat.h
+rename to tools/objtool/arch/x86/include/asm/inat.h
+index 125ecd2a300d..02aff0867211 100644
+--- a/tools/objtool/arch/x86/insn/inat.h
++++ b/tools/objtool/arch/x86/include/asm/inat.h
+@@ -20,7 +20,7 @@
+  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+  *
+  */
+-#include "inat_types.h"
++#include <asm/inat_types.h>
+ 
+ /*
+  * Internal bits. Don't use bitmasks directly, because these bits are
+diff --git a/tools/objtool/arch/x86/insn/inat_types.h b/tools/objtool/arch/x86/include/asm/inat_types.h
+similarity index 100%
+rename from tools/objtool/arch/x86/insn/inat_types.h
+rename to tools/objtool/arch/x86/include/asm/inat_types.h
+diff --git a/tools/objtool/arch/x86/insn/insn.h b/tools/objtool/arch/x86/include/asm/insn.h
+similarity index 99%
+rename from tools/objtool/arch/x86/insn/insn.h
+rename to tools/objtool/arch/x86/include/asm/insn.h
+index e23578c7b1be..b3e32b010ab1 100644
+--- a/tools/objtool/arch/x86/insn/insn.h
++++ b/tools/objtool/arch/x86/include/asm/insn.h
+@@ -21,7 +21,7 @@
+  */
+ 
+ /* insn_attr_t is defined in inat.h */
+-#include "inat.h"
++#include <asm/inat.h>
+ 
+ struct insn_field {
+       union {
+diff --git a/tools/objtool/orc_types.h b/tools/objtool/arch/x86/include/asm/orc_types.h
+similarity index 100%
+rename from tools/objtool/orc_types.h
+rename to tools/objtool/arch/x86/include/asm/orc_types.h
+diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h
+index a4139e386ef3..b0e92a6d0903 100644
+--- a/tools/objtool/orc.h
++++ b/tools/objtool/orc.h
+@@ -18,7 +18,7 @@
+ #ifndef _ORC_H
+ #define _ORC_H
+ 
+-#include "orc_types.h"
++#include <asm/orc_types.h>
+ 
+ struct objtool_file;
+ 
+diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
+index 4559a21a8de2..92f57996d66c 100644
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -19,9 +19,9 @@
+ #include <stdlib.h>
+ 
+ #define unlikely(cond) (cond)
+-#include "insn/insn.h"
+-#include "insn/inat.c"
+-#include "insn/insn.c"
++#include <asm/insn.h>
++#include "lib/inat.c"
++#include "lib/insn.c"
+ 
+ #include "../../elf.h"
+ #include "../../arch.h"
+diff --git a/tools/objtool/arch/x86/insn/inat.c b/tools/objtool/arch/x86/lib/inat.c
+similarity index 99%
+rename from tools/objtool/arch/x86/insn/inat.c
+rename to tools/objtool/arch/x86/lib/inat.c
+index e4bf28e6f4c7..c1f01a8e9f65 100644
+--- a/tools/objtool/arch/x86/insn/inat.c
++++ b/tools/objtool/arch/x86/lib/inat.c
+@@ -18,7 +18,7 @@
+  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+  *
+  */
+-#include "insn.h"
++#include <asm/insn.h>
+ 
+ /* Attribute tables are generated from opcode map */
+ #include "inat-tables.c"
+diff --git a/tools/objtool/arch/x86/insn/insn.c b/tools/objtool/arch/x86/lib/insn.c
+similarity index 99%
+rename from tools/objtool/arch/x86/insn/insn.c
+rename to tools/objtool/arch/x86/lib/insn.c
+index ca983e2bea8b..1088eb8f3a5f 100644
+--- a/tools/objtool/arch/x86/insn/insn.c
++++ b/tools/objtool/arch/x86/lib/insn.c
+@@ -23,8 +23,8 @@
+ #else
+ #include <string.h>
+ #endif
+-#include "inat.h"
+-#include "insn.h"
++#include <asm/inat.h>
++#include <asm/insn.h>
+ 
+ /* Verify next sizeof(t) bytes can be on the same instruction */
+ #define validate_next(t, insn, n)     \
+diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
+index d3102c865a95..914cff12899b 100644
+--- a/tools/objtool/.gitignore
++++ b/tools/objtool/.gitignore
+@@ -1,3 +1,3 @@
+-arch/x86/insn/inat-tables.c
++arch/x86/lib/inat-tables.c
+ objtool
+ fixdep
+diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build
+index debbdb0b5c43..b998412c017d 100644
+--- a/tools/objtool/arch/x86/Build
++++ b/tools/objtool/arch/x86/Build
+@@ -1,12 +1,12 @@
+ objtool-y += decode.o
+ 
+-inat_tables_script = arch/x86/insn/gen-insn-attr-x86.awk
+-inat_tables_maps = arch/x86/insn/x86-opcode-map.txt
++inat_tables_script = arch/x86/tools/gen-insn-attr-x86.awk
++inat_tables_maps = arch/x86/lib/x86-opcode-map.txt
+ 
+-$(OUTPUT)arch/x86/insn/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
++$(OUTPUT)arch/x86/lib/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+       $(call rule_mkdir)
+       $(Q)$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
+ 
+-$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/insn/inat-tables.c
++$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/lib/inat-tables.c
+ 
+-CFLAGS_decode.o += -I$(OUTPUT)arch/x86/insn
++CFLAGS_decode.o += -I$(OUTPUT)arch/x86/lib
+diff --git a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk
+similarity index 100%
+rename from tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk
+rename to tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk
+-- 
+2.14.2
+
diff --git a/patches/kernel/0133-objtool-Fix-cross-build.patch b/patches/kernel/0133-objtool-Fix-cross-build.patch

deleted file mode 100644 (file)

index 7e8ae7c..0000000
--- a/patches/kernel/0133-objtool-Fix-cross-build.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Tue, 7 Nov 2017 21:01:52 -0600
-Subject: [PATCH] objtool: Fix cross-build
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Stephen Rothwell reported this cross-compilation build failure:
-
-| In file included from orc_dump.c:19:0:
-| orc.h:21:10: fatal error: asm/orc_types.h: No such file or directory
-| ...
-
-Caused by:
-
-  6a77cff819ae ("objtool: Move synced files to their original relative locations")
-
-Use the proper arch header files location, not the host-arch location.
-
-Bisected-by: Stephen Rothwell <sfr@canb.auug.org.au>
-Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Linux-Next Mailing List <linux-next@vger.kernel.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/20171108030152.bd76eahiwjwjt3kp@treble
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit 26bda786fb129698d96c9bc6d243f7a3cd3fc668)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit fbe7b2a70b2949ec3ba359c04fb60d8f31f74e04)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/objtool/Makefile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
-index 90b0241f3ccc..847e99aa54ea 100644
---- a/tools/objtool/Makefile
-+++ b/tools/objtool/Makefile
-@@ -26,7 +26,7 @@ all: $(OBJTOOL)
- 
- INCLUDES := -I$(srctree)/tools/include \
-           -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
--          -I$(srctree)/tools/objtool/arch/$(HOSTARCH)/include
-+          -I$(srctree)/tools/objtool/arch/$(ARCH)/include
- CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -fomit-frame-pointer -O2 -g $(INCLUDES)
- LDFLAGS  += -lelf $(LIBSUBCMD)
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0133-objtool-Move-kernel-headers-code-sync-check-to-a-scr.patch b/patches/kernel/0133-objtool-Move-kernel-headers-code-sync-check-to-a-scr.patch

new file mode 100644 (file)

index 0000000..5ebc193
--- /dev/null
+++ b/patches/kernel/0133-objtool-Move-kernel-headers-code-sync-check-to-a-scr.patch
@@ -0,0 +1,99 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 6 Nov 2017 07:21:51 -0600
+Subject: [PATCH] objtool: Move kernel headers/code sync check to a script
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Replace the nasty diff checks in the objtool Makefile with a clean bash
+script, and make the warnings more specific.
+
+Heavily inspired by tools/perf/check-headers.sh.
+
+Suggested-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/ab015f15ccd8c0c6008493c3c6ee3d495eaf2927.1509974346.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit a89ec413c623eb2870180bcad678046bf7bc8465)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 4e72ce95a057e744b8d580239e2d8afa51118d82)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/objtool/Makefile      | 16 +---------------
+ tools/objtool/sync-check.sh | 29 +++++++++++++++++++++++++++++
+ 2 files changed, 30 insertions(+), 15 deletions(-)
+ create mode 100755 tools/objtool/sync-check.sh
+
+diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
+index f95f48e445c3..90b0241f3ccc 100644
+--- a/tools/objtool/Makefile
++++ b/tools/objtool/Makefile
+@@ -41,22 +41,8 @@ include $(srctree)/tools/build/Makefile.include
+ $(OBJTOOL_IN): fixdep FORCE
+       @$(MAKE) $(build)=objtool
+ 
+-# Busybox's diff doesn't have -I, avoid warning in that case
+-#
+ $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
+-      @(diff -I 2>&1 | grep -q 'option requires an argument' && \
+-      test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
+-      diff arch/x86/lib/insn.c ../../arch/x86/lib/insn.c >/dev/null && \
+-      diff arch/x86/lib/inat.c ../../arch/x86/lib/inat.c >/dev/null && \
+-      diff arch/x86/lib/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \
+-      diff arch/x86/tools/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \
+-      diff arch/x86/include/asm/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \
+-      diff arch/x86/include/asm/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
+-      diff arch/x86/include/asm/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
+-      || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true
+-      @(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
+-      diff ../../arch/x86/include/asm/orc_types.h arch/x86/include/asm/orc_types.h >/dev/null) \
+-      || echo "warning: objtool: orc_types.h differs from kernel" >&2 )) || true
++      @./sync-check.sh
+       $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
+ 
+ 
+diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
+new file mode 100755
+index 000000000000..1470e74e9d66
+--- /dev/null
++++ b/tools/objtool/sync-check.sh
+@@ -0,0 +1,29 @@
++#!/bin/sh
++# SPDX-License-Identifier: GPL-2.0
++
++FILES='
++arch/x86/lib/insn.c
++arch/x86/lib/inat.c
++arch/x86/lib/x86-opcode-map.txt
++arch/x86/tools/gen-insn-attr-x86.awk
++arch/x86/include/asm/insn.h
++arch/x86/include/asm/inat.h
++arch/x86/include/asm/inat_types.h
++arch/x86/include/asm/orc_types.h
++'
++
++check()
++{
++      local file=$1
++
++      diff $file ../../$file > /dev/null ||
++              echo "Warning: synced file at 'tools/objtool/$file' differs from latest kernel version at '$file'"
++}
++
++if [ ! -d ../../kernel ] || [ ! -d ../../tools ] || [ ! -d ../objtool ]; then
++      exit 0
++fi
++
++for i in $FILES; do
++  check $i
++done
+-- 
+2.14.2
+
diff --git a/patches/kernel/0134-objtool-Fix-cross-build.patch b/patches/kernel/0134-objtool-Fix-cross-build.patch

new file mode 100644 (file)

index 0000000..7e8ae7c
--- /dev/null
+++ b/patches/kernel/0134-objtool-Fix-cross-build.patch
@@ -0,0 +1,56 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Tue, 7 Nov 2017 21:01:52 -0600
+Subject: [PATCH] objtool: Fix cross-build
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Stephen Rothwell reported this cross-compilation build failure:
+
+| In file included from orc_dump.c:19:0:
+| orc.h:21:10: fatal error: asm/orc_types.h: No such file or directory
+| ...
+
+Caused by:
+
+  6a77cff819ae ("objtool: Move synced files to their original relative locations")
+
+Use the proper arch header files location, not the host-arch location.
+
+Bisected-by: Stephen Rothwell <sfr@canb.auug.org.au>
+Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Linux-Next Mailing List <linux-next@vger.kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20171108030152.bd76eahiwjwjt3kp@treble
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit 26bda786fb129698d96c9bc6d243f7a3cd3fc668)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit fbe7b2a70b2949ec3ba359c04fb60d8f31f74e04)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/objtool/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
+index 90b0241f3ccc..847e99aa54ea 100644
+--- a/tools/objtool/Makefile
++++ b/tools/objtool/Makefile
+@@ -26,7 +26,7 @@ all: $(OBJTOOL)
+ 
+ INCLUDES := -I$(srctree)/tools/include \
+           -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
+-          -I$(srctree)/tools/objtool/arch/$(HOSTARCH)/include
++          -I$(srctree)/tools/objtool/arch/$(ARCH)/include
+ CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -fomit-frame-pointer -O2 -g $(INCLUDES)
+ LDFLAGS  += -lelf $(LIBSUBCMD)
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0134-tools-headers-Sync-objtool-UAPI-header.patch b/patches/kernel/0134-tools-headers-Sync-objtool-UAPI-header.patch

deleted file mode 100644 (file)

index 29ce403..0000000
--- a/patches/kernel/0134-tools-headers-Sync-objtool-UAPI-header.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Ingo Molnar <mingo@kernel.org>
-Date: Tue, 14 Nov 2017 07:24:22 +0100
-Subject: [PATCH] tools/headers: Sync objtool UAPI header
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-objtool grew this new warning:
-
-  Warning: synced file at 'tools/objtool/arch/x86/include/asm/inat.h' differs from latest kernel version at 'arch/x86/include/asm/inat.h'
-
-which upstream header grew new INAT_SEG_* definitions.
-
-Sync up the tooling version of the header.
-
-Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-kernel@vger.kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 1ca1d1e5618960574fb01507dbab07e5337049a1)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 3dd05d51e337e9d780fb0e7c46d7216a79380d7b)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/objtool/arch/x86/include/asm/inat.h | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/tools/objtool/arch/x86/include/asm/inat.h b/tools/objtool/arch/x86/include/asm/inat.h
-index 02aff0867211..1c78580e58be 100644
---- a/tools/objtool/arch/x86/include/asm/inat.h
-+++ b/tools/objtool/arch/x86/include/asm/inat.h
-@@ -97,6 +97,16 @@
- #define INAT_MAKE_GROUP(grp)  ((grp << INAT_GRP_OFFS) | INAT_MODRM)
- #define INAT_MAKE_IMM(imm)    (imm << INAT_IMM_OFFS)
- 
-+/* Identifiers for segment registers */
-+#define INAT_SEG_REG_IGNORE   0
-+#define INAT_SEG_REG_DEFAULT  1
-+#define INAT_SEG_REG_CS               2
-+#define INAT_SEG_REG_SS               3
-+#define INAT_SEG_REG_DS               4
-+#define INAT_SEG_REG_ES               5
-+#define INAT_SEG_REG_FS               6
-+#define INAT_SEG_REG_GS               7
-+
- /* Attribute search APIs */
- extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
- extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
--- 
-2.14.2
-
diff --git a/patches/kernel/0135-objtool-Fix-64-bit-build-on-32-bit-host.patch b/patches/kernel/0135-objtool-Fix-64-bit-build-on-32-bit-host.patch

deleted file mode 100644 (file)

index 2a4d103..0000000
--- a/patches/kernel/0135-objtool-Fix-64-bit-build-on-32-bit-host.patch
+++ /dev/null
@@ -1,103 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Mikulas Patocka <mpatocka@redhat.com>
-Date: Sat, 2 Dec 2017 16:17:44 -0600
-Subject: [PATCH] objtool: Fix 64-bit build on 32-bit host
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The new ORC unwinder breaks the build of a 64-bit kernel on a 32-bit
-host.  Building the kernel on a i386 or x32 host fails with:
-
-  orc_dump.c: In function 'orc_dump':
-  orc_dump.c:105:26: error: passing argument 2 of 'elf_getshdrnum' from incompatible pointer type [-Werror=incompatible-pointer-types]
-    if (elf_getshdrnum(elf, &nr_sections)) {
-                            ^
-  In file included from /usr/local/include/gelf.h:32:0,
-                   from elf.h:22,
-                   from warn.h:26,
-                   from orc_dump.c:20:
-  /usr/local/include/libelf.h:304:12: note: expected 'size_t * {aka unsigned int *}' but argument is of type 'long unsigned int *'
-   extern int elf_getshdrnum (Elf *__elf, size_t *__dst);
-              ^~~~~~~~~~~~~~
-  orc_dump.c:190:17: error: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'Elf64_Sxword {aka long long int}' [-Werror=format=]
-      printf("%s+%lx:", name, rela.r_addend);
-                 ~~^          ~~~~~~~~~~~~~
-                 %llx
-
-Fix the build failure.
-
-Another problem is that if the user specifies HOSTCC or HOSTLD
-variables, they are ignored in the objtool makefile.  Change the
-Makefile to respect these variables.
-
-Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Sven Joachim <svenjoac@gmx.de>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Fixes: 627fce14809b ("objtool: Add ORC unwind table generation")
-Link: http://lkml.kernel.org/r/19f0e64d8e07e30a7b307cd010eb780c404fe08d.1512252895.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 0db897fb081b66c26a338e5481f317c71df779c9)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 979c9a5cacd1d40d08c1c24ed5c5810cf7f3069c)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/objtool/Makefile   | 8 +++++---
- tools/objtool/orc_dump.c | 7 ++++---
- 2 files changed, 9 insertions(+), 6 deletions(-)
-
-diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
-index 847e99aa54ea..5c71bae01064 100644
---- a/tools/objtool/Makefile
-+++ b/tools/objtool/Makefile
-@@ -6,9 +6,11 @@ ARCH := x86
- endif
- 
- # always use the host compiler
--CC = gcc
--LD = ld
--AR = ar
-+HOSTCC        ?= gcc
-+HOSTLD        ?= ld
-+CC     = $(HOSTCC)
-+LD     = $(HOSTLD)
-+AR     = ar
- 
- ifeq ($(srctree),)
- srctree := $(patsubst %/,%,$(dir $(CURDIR)))
-diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
-index 36c5bf6a2675..c3343820916a 100644
---- a/tools/objtool/orc_dump.c
-+++ b/tools/objtool/orc_dump.c
-@@ -76,7 +76,8 @@ int orc_dump(const char *_objname)
-       int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0;
-       struct orc_entry *orc = NULL;
-       char *name;
--      unsigned long nr_sections, orc_ip_addr = 0;
-+      size_t nr_sections;
-+      Elf64_Addr orc_ip_addr = 0;
-       size_t shstrtab_idx;
-       Elf *elf;
-       Elf_Scn *scn;
-@@ -187,10 +188,10 @@ int orc_dump(const char *_objname)
-                               return -1;
-                       }
- 
--                      printf("%s+%lx:", name, rela.r_addend);
-+                      printf("%s+%llx:", name, (unsigned long long)rela.r_addend);
- 
-               } else {
--                      printf("%lx:", orc_ip_addr + (i * sizeof(int)) + orc_ip[i]);
-+                      printf("%llx:", (unsigned long long)(orc_ip_addr + (i * sizeof(int)) + orc_ip[i]));
-               }
- 
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0135-tools-headers-Sync-objtool-UAPI-header.patch b/patches/kernel/0135-tools-headers-Sync-objtool-UAPI-header.patch

new file mode 100644 (file)

index 0000000..29ce403
--- /dev/null
+++ b/patches/kernel/0135-tools-headers-Sync-objtool-UAPI-header.patch
@@ -0,0 +1,57 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@kernel.org>
+Date: Tue, 14 Nov 2017 07:24:22 +0100
+Subject: [PATCH] tools/headers: Sync objtool UAPI header
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+objtool grew this new warning:
+
+  Warning: synced file at 'tools/objtool/arch/x86/include/asm/inat.h' differs from latest kernel version at 'arch/x86/include/asm/inat.h'
+
+which upstream header grew new INAT_SEG_* definitions.
+
+Sync up the tooling version of the header.
+
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 1ca1d1e5618960574fb01507dbab07e5337049a1)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 3dd05d51e337e9d780fb0e7c46d7216a79380d7b)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/objtool/arch/x86/include/asm/inat.h | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/tools/objtool/arch/x86/include/asm/inat.h b/tools/objtool/arch/x86/include/asm/inat.h
+index 02aff0867211..1c78580e58be 100644
+--- a/tools/objtool/arch/x86/include/asm/inat.h
++++ b/tools/objtool/arch/x86/include/asm/inat.h
+@@ -97,6 +97,16 @@
+ #define INAT_MAKE_GROUP(grp)  ((grp << INAT_GRP_OFFS) | INAT_MODRM)
+ #define INAT_MAKE_IMM(imm)    (imm << INAT_IMM_OFFS)
+ 
++/* Identifiers for segment registers */
++#define INAT_SEG_REG_IGNORE   0
++#define INAT_SEG_REG_DEFAULT  1
++#define INAT_SEG_REG_CS               2
++#define INAT_SEG_REG_SS               3
++#define INAT_SEG_REG_DS               4
++#define INAT_SEG_REG_ES               5
++#define INAT_SEG_REG_FS               6
++#define INAT_SEG_REG_GS               7
++
+ /* Attribute search APIs */
+ extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+ extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0136-objtool-Fix-64-bit-build-on-32-bit-host.patch b/patches/kernel/0136-objtool-Fix-64-bit-build-on-32-bit-host.patch

new file mode 100644 (file)

index 0000000..2a4d103
--- /dev/null
+++ b/patches/kernel/0136-objtool-Fix-64-bit-build-on-32-bit-host.patch
@@ -0,0 +1,103 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Sat, 2 Dec 2017 16:17:44 -0600
+Subject: [PATCH] objtool: Fix 64-bit build on 32-bit host
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The new ORC unwinder breaks the build of a 64-bit kernel on a 32-bit
+host.  Building the kernel on a i386 or x32 host fails with:
+
+  orc_dump.c: In function 'orc_dump':
+  orc_dump.c:105:26: error: passing argument 2 of 'elf_getshdrnum' from incompatible pointer type [-Werror=incompatible-pointer-types]
+    if (elf_getshdrnum(elf, &nr_sections)) {
+                            ^
+  In file included from /usr/local/include/gelf.h:32:0,
+                   from elf.h:22,
+                   from warn.h:26,
+                   from orc_dump.c:20:
+  /usr/local/include/libelf.h:304:12: note: expected 'size_t * {aka unsigned int *}' but argument is of type 'long unsigned int *'
+   extern int elf_getshdrnum (Elf *__elf, size_t *__dst);
+              ^~~~~~~~~~~~~~
+  orc_dump.c:190:17: error: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'Elf64_Sxword {aka long long int}' [-Werror=format=]
+      printf("%s+%lx:", name, rela.r_addend);
+                 ~~^          ~~~~~~~~~~~~~
+                 %llx
+
+Fix the build failure.
+
+Another problem is that if the user specifies HOSTCC or HOSTLD
+variables, they are ignored in the objtool makefile.  Change the
+Makefile to respect these variables.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Sven Joachim <svenjoac@gmx.de>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Fixes: 627fce14809b ("objtool: Add ORC unwind table generation")
+Link: http://lkml.kernel.org/r/19f0e64d8e07e30a7b307cd010eb780c404fe08d.1512252895.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 0db897fb081b66c26a338e5481f317c71df779c9)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 979c9a5cacd1d40d08c1c24ed5c5810cf7f3069c)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/objtool/Makefile   | 8 +++++---
+ tools/objtool/orc_dump.c | 7 ++++---
+ 2 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
+index 847e99aa54ea..5c71bae01064 100644
+--- a/tools/objtool/Makefile
++++ b/tools/objtool/Makefile
+@@ -6,9 +6,11 @@ ARCH := x86
+ endif
+ 
+ # always use the host compiler
+-CC = gcc
+-LD = ld
+-AR = ar
++HOSTCC        ?= gcc
++HOSTLD        ?= ld
++CC     = $(HOSTCC)
++LD     = $(HOSTLD)
++AR     = ar
+ 
+ ifeq ($(srctree),)
+ srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
+index 36c5bf6a2675..c3343820916a 100644
+--- a/tools/objtool/orc_dump.c
++++ b/tools/objtool/orc_dump.c
+@@ -76,7 +76,8 @@ int orc_dump(const char *_objname)
+       int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0;
+       struct orc_entry *orc = NULL;
+       char *name;
+-      unsigned long nr_sections, orc_ip_addr = 0;
++      size_t nr_sections;
++      Elf64_Addr orc_ip_addr = 0;
+       size_t shstrtab_idx;
+       Elf *elf;
+       Elf_Scn *scn;
+@@ -187,10 +188,10 @@ int orc_dump(const char *_objname)
+                               return -1;
+                       }
+ 
+-                      printf("%s+%lx:", name, rela.r_addend);
++                      printf("%s+%llx:", name, (unsigned long long)rela.r_addend);
+ 
+               } else {
+-                      printf("%lx:", orc_ip_addr + (i * sizeof(int)) + orc_ip[i]);
++                      printf("%llx:", (unsigned long long)(orc_ip_addr + (i * sizeof(int)) + orc_ip[i]));
+               }
+ 
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0136-x86-decoder-Fix-and-update-the-opcodes-map.patch b/patches/kernel/0136-x86-decoder-Fix-and-update-the-opcodes-map.patch

deleted file mode 100644 (file)

index ec75390..0000000
--- a/patches/kernel/0136-x86-decoder-Fix-and-update-the-opcodes-map.patch
+++ /dev/null
@@ -1,171 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Randy Dunlap <rdunlap@infradead.org>
-Date: Mon, 11 Dec 2017 10:38:36 -0800
-Subject: [PATCH] x86/decoder: Fix and update the opcodes map
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Update x86-opcode-map.txt based on the October 2017 Intel SDM publication.
-Fix INVPID to INVVPID.
-Add UD0 and UD1 instruction opcodes.
-
-Also sync the objtool and perf tooling copies of this file.
-
-Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
-Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Masami Hiramatsu <masami.hiramatsu@gmail.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Link: http://lkml.kernel.org/r/aac062d7-c0f6-96e3-5c92-ed299e2bd3da@infradead.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit f5395545058cd388da5d99bda3dedd2a2fe56dbc)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f88b977dde8156d6c4514114baa0eed05dd48e41)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/lib/x86-opcode-map.txt                     | 13 +++++++++++--
- tools/objtool/arch/x86/lib/x86-opcode-map.txt       | 15 ++++++++++++---
- tools/perf/util/intel-pt-decoder/x86-opcode-map.txt | 15 ++++++++++++---
- 3 files changed, 35 insertions(+), 8 deletions(-)
-
-diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
-index 12e377184ee4..aa2270dc9e87 100644
---- a/arch/x86/lib/x86-opcode-map.txt
-+++ b/arch/x86/lib/x86-opcode-map.txt
-@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
- fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
- fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
- fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
--ff:
-+ff: UD0
- EndTable
- 
- Table: 3-byte opcode 1 (0x0f 0x38)
-@@ -717,7 +717,7 @@ AVXcode: 2
- 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
- 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
- 80: INVEPT Gy,Mdq (66)
--81: INVPID Gy,Mdq (66)
-+81: INVVPID Gy,Mdq (66)
- 82: INVPCID Gy,Mdq (66)
- 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
- 88: vexpandps/d Vpd,Wpd (66),(ev)
-@@ -970,6 +970,15 @@ GrpTable: Grp9
- EndTable
- 
- GrpTable: Grp10
-+# all are UD1
-+0: UD1
-+1: UD1
-+2: UD1
-+3: UD1
-+4: UD1
-+5: UD1
-+6: UD1
-+7: UD1
- EndTable
- 
- # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
-diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
-index 12e377184ee4..e0b85930dd77 100644
---- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt
-+++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
-@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
- fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
- fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
- fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
--ff:
-+ff: UD0
- EndTable
- 
- Table: 3-byte opcode 1 (0x0f 0x38)
-@@ -717,7 +717,7 @@ AVXcode: 2
- 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
- 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
- 80: INVEPT Gy,Mdq (66)
--81: INVPID Gy,Mdq (66)
-+81: INVVPID Gy,Mdq (66)
- 82: INVPCID Gy,Mdq (66)
- 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
- 88: vexpandps/d Vpd,Wpd (66),(ev)
-@@ -896,7 +896,7 @@ EndTable
- 
- GrpTable: Grp3_1
- 0: TEST Eb,Ib
--1:
-+1: TEST Eb,Ib
- 2: NOT Eb
- 3: NEG Eb
- 4: MUL AL,Eb
-@@ -970,6 +970,15 @@ GrpTable: Grp9
- EndTable
- 
- GrpTable: Grp10
-+# all are UD1
-+0: UD1
-+1: UD1
-+2: UD1
-+3: UD1
-+4: UD1
-+5: UD1
-+6: UD1
-+7: UD1
- EndTable
- 
- # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
-diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
-index 12e377184ee4..e0b85930dd77 100644
---- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
-+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
-@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
- fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
- fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
- fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
--ff:
-+ff: UD0
- EndTable
- 
- Table: 3-byte opcode 1 (0x0f 0x38)
-@@ -717,7 +717,7 @@ AVXcode: 2
- 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
- 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
- 80: INVEPT Gy,Mdq (66)
--81: INVPID Gy,Mdq (66)
-+81: INVVPID Gy,Mdq (66)
- 82: INVPCID Gy,Mdq (66)
- 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
- 88: vexpandps/d Vpd,Wpd (66),(ev)
-@@ -896,7 +896,7 @@ EndTable
- 
- GrpTable: Grp3_1
- 0: TEST Eb,Ib
--1:
-+1: TEST Eb,Ib
- 2: NOT Eb
- 3: NEG Eb
- 4: MUL AL,Eb
-@@ -970,6 +970,15 @@ GrpTable: Grp9
- EndTable
- 
- GrpTable: Grp10
-+# all are UD1
-+0: UD1
-+1: UD1
-+2: UD1
-+3: UD1
-+4: UD1
-+5: UD1
-+6: UD1
-+7: UD1
- EndTable
- 
- # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
--- 
-2.14.2
-
diff --git a/patches/kernel/0137-x86-decoder-Add-new-TEST-instruction-pattern.patch b/patches/kernel/0137-x86-decoder-Add-new-TEST-instruction-pattern.patch

deleted file mode 100644 (file)

index d459a14..0000000
--- a/patches/kernel/0137-x86-decoder-Add-new-TEST-instruction-pattern.patch
+++ /dev/null
@@ -1,68 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Masami Hiramatsu <mhiramat@kernel.org>
-Date: Fri, 24 Nov 2017 13:56:30 +0900
-Subject: [PATCH] x86/decoder: Add new TEST instruction pattern
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The kbuild test robot reported this build warning:
-
-  Warning: arch/x86/tools/test_get_len found difference at <jump_table>:ffffffff8103dd2c
-
-  Warning: ffffffff8103dd82: f6 09 d8 testb $0xd8,(%rcx)
-  Warning: objdump says 3 bytes, but insn_get_length() says 2
-  Warning: decoded and checked 1569014 instructions with 1 warnings
-
-This sequence seems to be a new instruction not in the opcode map in the Intel SDM.
-
-The instruction sequence is "F6 09 d8", means Group3(F6), MOD(00)REG(001)RM(001), and 0xd8.
-Intel SDM vol2 A.4 Table A-6 said the table index in the group is "Encoding of Bits 5,4,3 of
-the ModR/M Byte (bits 2,1,0 in parenthesis)"
-
-In that table, opcodes listed by the index REG bits as:
-
-  000         001       010 011  100        101        110         111
- TEST Ib/Iz,(undefined),NOT,NEG,MUL AL/rAX,IMUL AL/rAX,DIV AL/rAX,IDIV AL/rAX
-
-So, it seems TEST Ib is assigned to 001.
-
-Add the new pattern.
-
-Reported-by: kbuild test robot <fengguang.wu@intel.com>
-Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
-Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Cc: <stable@vger.kernel.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-kernel@vger.kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 2cf68f74af0a6cf808ad03f0d528c72b03c89cc7)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8896d68f8ff2a97b91279221ddaba73664c5161d)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/lib/x86-opcode-map.txt | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
-index aa2270dc9e87..e0b85930dd77 100644
---- a/arch/x86/lib/x86-opcode-map.txt
-+++ b/arch/x86/lib/x86-opcode-map.txt
-@@ -896,7 +896,7 @@ EndTable
- 
- GrpTable: Grp3_1
- 0: TEST Eb,Ib
--1:
-+1: TEST Eb,Ib
- 2: NOT Eb
- 3: NEG Eb
- 4: MUL AL,Eb
--- 
-2.14.2
-
diff --git a/patches/kernel/0137-x86-decoder-Fix-and-update-the-opcodes-map.patch b/patches/kernel/0137-x86-decoder-Fix-and-update-the-opcodes-map.patch

new file mode 100644 (file)

index 0000000..ec75390
--- /dev/null
+++ b/patches/kernel/0137-x86-decoder-Fix-and-update-the-opcodes-map.patch
@@ -0,0 +1,171 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Randy Dunlap <rdunlap@infradead.org>
+Date: Mon, 11 Dec 2017 10:38:36 -0800
+Subject: [PATCH] x86/decoder: Fix and update the opcodes map
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Update x86-opcode-map.txt based on the October 2017 Intel SDM publication.
+Fix INVPID to INVVPID.
+Add UD0 and UD1 instruction opcodes.
+
+Also sync the objtool and perf tooling copies of this file.
+
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Masami Hiramatsu <masami.hiramatsu@gmail.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/aac062d7-c0f6-96e3-5c92-ed299e2bd3da@infradead.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit f5395545058cd388da5d99bda3dedd2a2fe56dbc)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f88b977dde8156d6c4514114baa0eed05dd48e41)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/lib/x86-opcode-map.txt                     | 13 +++++++++++--
+ tools/objtool/arch/x86/lib/x86-opcode-map.txt       | 15 ++++++++++++---
+ tools/perf/util/intel-pt-decoder/x86-opcode-map.txt | 15 ++++++++++++---
+ 3 files changed, 35 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
+index 12e377184ee4..aa2270dc9e87 100644
+--- a/arch/x86/lib/x86-opcode-map.txt
++++ b/arch/x86/lib/x86-opcode-map.txt
+@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
+ fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
+ fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
+ fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
+-ff:
++ff: UD0
+ EndTable
+ 
+ Table: 3-byte opcode 1 (0x0f 0x38)
+@@ -717,7 +717,7 @@ AVXcode: 2
+ 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
+ 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
+ 80: INVEPT Gy,Mdq (66)
+-81: INVPID Gy,Mdq (66)
++81: INVVPID Gy,Mdq (66)
+ 82: INVPCID Gy,Mdq (66)
+ 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
+ 88: vexpandps/d Vpd,Wpd (66),(ev)
+@@ -970,6 +970,15 @@ GrpTable: Grp9
+ EndTable
+ 
+ GrpTable: Grp10
++# all are UD1
++0: UD1
++1: UD1
++2: UD1
++3: UD1
++4: UD1
++5: UD1
++6: UD1
++7: UD1
+ EndTable
+ 
+ # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
+diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
+index 12e377184ee4..e0b85930dd77 100644
+--- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt
++++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
+@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
+ fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
+ fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
+ fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
+-ff:
++ff: UD0
+ EndTable
+ 
+ Table: 3-byte opcode 1 (0x0f 0x38)
+@@ -717,7 +717,7 @@ AVXcode: 2
+ 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
+ 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
+ 80: INVEPT Gy,Mdq (66)
+-81: INVPID Gy,Mdq (66)
++81: INVVPID Gy,Mdq (66)
+ 82: INVPCID Gy,Mdq (66)
+ 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
+ 88: vexpandps/d Vpd,Wpd (66),(ev)
+@@ -896,7 +896,7 @@ EndTable
+ 
+ GrpTable: Grp3_1
+ 0: TEST Eb,Ib
+-1:
++1: TEST Eb,Ib
+ 2: NOT Eb
+ 3: NEG Eb
+ 4: MUL AL,Eb
+@@ -970,6 +970,15 @@ GrpTable: Grp9
+ EndTable
+ 
+ GrpTable: Grp10
++# all are UD1
++0: UD1
++1: UD1
++2: UD1
++3: UD1
++4: UD1
++5: UD1
++6: UD1
++7: UD1
+ EndTable
+ 
+ # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
+diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+index 12e377184ee4..e0b85930dd77 100644
+--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
++++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
+ fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
+ fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
+ fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
+-ff:
++ff: UD0
+ EndTable
+ 
+ Table: 3-byte opcode 1 (0x0f 0x38)
+@@ -717,7 +717,7 @@ AVXcode: 2
+ 7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
+ 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
+ 80: INVEPT Gy,Mdq (66)
+-81: INVPID Gy,Mdq (66)
++81: INVVPID Gy,Mdq (66)
+ 82: INVPCID Gy,Mdq (66)
+ 83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
+ 88: vexpandps/d Vpd,Wpd (66),(ev)
+@@ -896,7 +896,7 @@ EndTable
+ 
+ GrpTable: Grp3_1
+ 0: TEST Eb,Ib
+-1:
++1: TEST Eb,Ib
+ 2: NOT Eb
+ 3: NEG Eb
+ 4: MUL AL,Eb
+@@ -970,6 +970,15 @@ GrpTable: Grp9
+ EndTable
+ 
+ GrpTable: Grp10
++# all are UD1
++0: UD1
++1: UD1
++2: UD1
++3: UD1
++4: UD1
++5: UD1
++6: UD1
++7: UD1
+ EndTable
+ 
+ # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
+-- 
+2.14.2
+
diff --git a/patches/kernel/0138-x86-decoder-Add-new-TEST-instruction-pattern.patch b/patches/kernel/0138-x86-decoder-Add-new-TEST-instruction-pattern.patch

new file mode 100644 (file)

index 0000000..d459a14
--- /dev/null
+++ b/patches/kernel/0138-x86-decoder-Add-new-TEST-instruction-pattern.patch
@@ -0,0 +1,68 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Fri, 24 Nov 2017 13:56:30 +0900
+Subject: [PATCH] x86/decoder: Add new TEST instruction pattern
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The kbuild test robot reported this build warning:
+
+  Warning: arch/x86/tools/test_get_len found difference at <jump_table>:ffffffff8103dd2c
+
+  Warning: ffffffff8103dd82: f6 09 d8 testb $0xd8,(%rcx)
+  Warning: objdump says 3 bytes, but insn_get_length() says 2
+  Warning: decoded and checked 1569014 instructions with 1 warnings
+
+This sequence seems to be a new instruction not in the opcode map in the Intel SDM.
+
+The instruction sequence is "F6 09 d8", means Group3(F6), MOD(00)REG(001)RM(001), and 0xd8.
+Intel SDM vol2 A.4 Table A-6 said the table index in the group is "Encoding of Bits 5,4,3 of
+the ModR/M Byte (bits 2,1,0 in parenthesis)"
+
+In that table, opcodes listed by the index REG bits as:
+
+  000         001       010 011  100        101        110         111
+ TEST Ib/Iz,(undefined),NOT,NEG,MUL AL/rAX,IMUL AL/rAX,DIV AL/rAX,IDIV AL/rAX
+
+So, it seems TEST Ib is assigned to 001.
+
+Add the new pattern.
+
+Reported-by: kbuild test robot <fengguang.wu@intel.com>
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: <stable@vger.kernel.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 2cf68f74af0a6cf808ad03f0d528c72b03c89cc7)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8896d68f8ff2a97b91279221ddaba73664c5161d)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/lib/x86-opcode-map.txt | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
+index aa2270dc9e87..e0b85930dd77 100644
+--- a/arch/x86/lib/x86-opcode-map.txt
++++ b/arch/x86/lib/x86-opcode-map.txt
+@@ -896,7 +896,7 @@ EndTable
+ 
+ GrpTable: Grp3_1
+ 0: TEST Eb,Ib
+-1:
++1: TEST Eb,Ib
+ 2: NOT Eb
+ 3: NEG Eb
+ 4: MUL AL,Eb
+-- 
+2.14.2
+
diff --git a/patches/kernel/0138-x86-insn-eval-Add-utility-functions-to-get-segment-s.patch b/patches/kernel/0138-x86-insn-eval-Add-utility-functions-to-get-segment-s.patch

deleted file mode 100644 (file)

index 87e821e..0000000
--- a/patches/kernel/0138-x86-insn-eval-Add-utility-functions-to-get-segment-s.patch
+++ /dev/null
@@ -1,119 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Ingo Molnar <mingo@kernel.org>
-Date: Sat, 23 Dec 2017 13:14:25 +0100
-Subject: [PATCH] x86/insn-eval: Add utility functions to get segment selector
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-When computing a linear address and segmentation is used, we need to know
-the base address of the segment involved in the computation. In most of
-the cases, the segment base address will be zero as in USER_DS/USER32_DS.
-However, it may be possible that a user space program defines its own
-segments via a local descriptor table. In such a case, the segment base
-address may not be zero. Thus, the segment base address is needed to
-calculate correctly the linear address.
-
-If running in protected mode, the segment selector to be used when
-computing a linear address is determined by either any of segment override
-prefixes in the instruction or inferred from the registers involved in the
-computation of the effective address; in that order. Also, there are cases
-when the segment override prefixes shall be ignored (i.e., code segments
-are always selected by the CS segment register; string instructions always
-use the ES segment register when using rDI register as operand). In long
-mode, segment registers are ignored, except for FS and GS. In these two
-cases, base addresses are obtained from the respective MSRs.
-
-For clarity, this process can be split into four steps (and an equal
-number of functions): determine if segment prefixes overrides can be used;
-parse the segment override prefixes, and use them if found; if not found
-or cannot be used, use the default segment registers associated with the
-operand registers. Once the segment register to use has been identified,
-read its value to obtain the segment selector.
-
-The method to obtain the segment selector depends on several factors. In
-32-bit builds, segment selectors are saved into a pt_regs structure
-when switching to kernel mode. The same is also true for virtual-8086
-mode. In 64-bit builds, segmentation is mostly ignored, except when
-running a program in 32-bit legacy mode. In this case, CS and SS can be
-obtained from pt_regs. DS, ES, FS and GS can be read directly from
-the respective segment registers.
-
-In order to identify the segment registers, a new set of #defines is
-introduced. It also includes two special identifiers. One of them
-indicates when the default segment register associated with instruction
-operands shall be used. Another one indicates that the contents of the
-segment register shall be ignored; this identifier is used when in long
-mode.
-
-Improvements-by: Borislav Petkov <bp@suse.de>
-Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: "Michael S. Tsirkin" <mst@redhat.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: ricardo.neri@intel.com
-Cc: Adrian Hunter <adrian.hunter@intel.com>
-Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
-Cc: Huang Rui <ray.huang@amd.com>
-Cc: Qiaowei Ren <qiaowei.ren@intel.com>
-Cc: Shuah Khan <shuah@kernel.org>
-Cc: Kees Cook <keescook@chromium.org>
-Cc: Jonathan Corbet <corbet@lwn.net>
-Cc: Jiri Slaby <jslaby@suse.cz>
-Cc: Dmitry Vyukov <dvyukov@google.com>
-Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
-Cc: Chris Metcalf <cmetcalf@mellanox.com>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Colin Ian King <colin.king@canonical.com>
-Cc: Chen Yucong <slaoub@gmail.com>
-Cc: Adam Buchbinder <adam.buchbinder@gmail.com>
-Cc: Vlastimil Babka <vbabka@suse.cz>
-Cc: Lorenzo Stoakes <lstoakes@gmail.com>
-Cc: Masami Hiramatsu <mhiramat@kernel.org>
-Cc: Paolo Bonzini <pbonzini@redhat.com>
-Cc: Andrew Morton <akpm@linux-foundation.org>
-Cc: Thomas Garnier <thgarnie@google.com>
-Link: https://lkml.kernel.org/r/1509135945-13762-14-git-send-email-ricardo.neri-calderon@linux.intel.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-
-(Partially cherry picked from commit 32d0b95300db03c2b23b2ea2c94769a4a138e79d)
-
-(cherry picked from commit ca2c18cb10c8beb56dfe21321abdddc724cec4de)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit abd7780592a3687eacc0a295d4d2959bb11ff75f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/inat.h | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
-index 02aff0867211..1c78580e58be 100644
---- a/arch/x86/include/asm/inat.h
-+++ b/arch/x86/include/asm/inat.h
-@@ -97,6 +97,16 @@
- #define INAT_MAKE_GROUP(grp)  ((grp << INAT_GRP_OFFS) | INAT_MODRM)
- #define INAT_MAKE_IMM(imm)    (imm << INAT_IMM_OFFS)
- 
-+/* Identifiers for segment registers */
-+#define INAT_SEG_REG_IGNORE   0
-+#define INAT_SEG_REG_DEFAULT  1
-+#define INAT_SEG_REG_CS               2
-+#define INAT_SEG_REG_SS               3
-+#define INAT_SEG_REG_DS               4
-+#define INAT_SEG_REG_ES               5
-+#define INAT_SEG_REG_FS               6
-+#define INAT_SEG_REG_GS               7
-+
- /* Attribute search APIs */
- extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
- extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
--- 
-2.14.2
-
diff --git a/patches/kernel/0139-x86-entry-64-paravirt-Use-paravirt-safe-macro-to-acc.patch b/patches/kernel/0139-x86-entry-64-paravirt-Use-paravirt-safe-macro-to-acc.patch

deleted file mode 100644 (file)

index 07d50ac..0000000
--- a/patches/kernel/0139-x86-entry-64-paravirt-Use-paravirt-safe-macro-to-acc.patch
+++ /dev/null
@@ -1,129 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Date: Mon, 4 Dec 2017 15:07:07 +0100
-Subject: [PATCH] x86/entry/64/paravirt: Use paravirt-safe macro to access
- eflags
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Commit 1d3e53e8624a ("x86/entry/64: Refactor IRQ stacks and make them
-NMI-safe") added DEBUG_ENTRY_ASSERT_IRQS_OFF macro that acceses eflags
-using 'pushfq' instruction when testing for IF bit. On PV Xen guests
-looking at IF flag directly will always see it set, resulting in 'ud2'.
-
-Introduce SAVE_FLAGS() macro that will use appropriate save_fl pv op when
-running paravirt.
-
-Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: xen-devel@lists.xenproject.org
-Link: https://lkml.kernel.org/r/20171204150604.899457242@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit e17f8234538d1ff708673f287a42457c4dee720d)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9f4a274842938ce8d55565ced4f45e7ad4a5da90)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/irqflags.h  | 3 +++
- arch/x86/include/asm/paravirt.h  | 9 +++++++++
- arch/x86/kernel/asm-offsets_64.c | 3 +++
- arch/x86/entry/entry_64.S        | 7 ++++---
- 4 files changed, 19 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
-index ac7692dcfa2e..d937781e1047 100644
---- a/arch/x86/include/asm/irqflags.h
-+++ b/arch/x86/include/asm/irqflags.h
-@@ -141,6 +141,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
-       swapgs;                                 \
-       sysretl
- 
-+#ifdef CONFIG_DEBUG_ENTRY
-+#define SAVE_FLAGS(x)         pushfq; popq %rax
-+#endif
- #else
- #define INTERRUPT_RETURN              iret
- #define ENABLE_INTERRUPTS_SYSEXIT     sti; sysexit
-diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
-index 43d4f90edebc..52dcd2361a78 100644
---- a/arch/x86/include/asm/paravirt.h
-+++ b/arch/x86/include/asm/paravirt.h
-@@ -926,6 +926,15 @@ extern void default_banner(void);
-       PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
-                 CLBR_NONE,                                            \
-                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
-+
-+#ifdef CONFIG_DEBUG_ENTRY
-+#define SAVE_FLAGS(clobbers)                                        \
-+      PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
-+                PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
-+                call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
-+                PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-+#endif
-+
- #endif        /* CONFIG_X86_32 */
- 
- #endif /* __ASSEMBLY__ */
-diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
-index cf42206926af..c21a5315b38e 100644
---- a/arch/x86/kernel/asm-offsets_64.c
-+++ b/arch/x86/kernel/asm-offsets_64.c
-@@ -22,6 +22,9 @@ int main(void)
- #ifdef CONFIG_PARAVIRT
-       OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
-       OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
-+#ifdef CONFIG_DEBUG_ENTRY
-+      OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
-+#endif
-       BLANK();
- #endif
- 
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 2491b3b25b9a..6c73e96daf78 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -461,12 +461,13 @@ END(irq_entries_start)
- 
- .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
- #ifdef CONFIG_DEBUG_ENTRY
--      pushfq
--      testl $X86_EFLAGS_IF, (%rsp)
-+      pushq %rax
-+      SAVE_FLAGS(CLBR_RAX)
-+      testl $X86_EFLAGS_IF, %eax
-       jz .Lokay_\@
-       ud2
- .Lokay_\@:
--      addq $8, %rsp
-+      popq %rax
- #endif
- .endm
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0139-x86-insn-eval-Add-utility-functions-to-get-segment-s.patch b/patches/kernel/0139-x86-insn-eval-Add-utility-functions-to-get-segment-s.patch

new file mode 100644 (file)

index 0000000..87e821e
--- /dev/null
+++ b/patches/kernel/0139-x86-insn-eval-Add-utility-functions-to-get-segment-s.patch
@@ -0,0 +1,119 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@kernel.org>
+Date: Sat, 23 Dec 2017 13:14:25 +0100
+Subject: [PATCH] x86/insn-eval: Add utility functions to get segment selector
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+When computing a linear address and segmentation is used, we need to know
+the base address of the segment involved in the computation. In most of
+the cases, the segment base address will be zero as in USER_DS/USER32_DS.
+However, it may be possible that a user space program defines its own
+segments via a local descriptor table. In such a case, the segment base
+address may not be zero. Thus, the segment base address is needed to
+calculate correctly the linear address.
+
+If running in protected mode, the segment selector to be used when
+computing a linear address is determined by either any of segment override
+prefixes in the instruction or inferred from the registers involved in the
+computation of the effective address; in that order. Also, there are cases
+when the segment override prefixes shall be ignored (i.e., code segments
+are always selected by the CS segment register; string instructions always
+use the ES segment register when using rDI register as operand). In long
+mode, segment registers are ignored, except for FS and GS. In these two
+cases, base addresses are obtained from the respective MSRs.
+
+For clarity, this process can be split into four steps (and an equal
+number of functions): determine if segment prefixes overrides can be used;
+parse the segment override prefixes, and use them if found; if not found
+or cannot be used, use the default segment registers associated with the
+operand registers. Once the segment register to use has been identified,
+read its value to obtain the segment selector.
+
+The method to obtain the segment selector depends on several factors. In
+32-bit builds, segment selectors are saved into a pt_regs structure
+when switching to kernel mode. The same is also true for virtual-8086
+mode. In 64-bit builds, segmentation is mostly ignored, except when
+running a program in 32-bit legacy mode. In this case, CS and SS can be
+obtained from pt_regs. DS, ES, FS and GS can be read directly from
+the respective segment registers.
+
+In order to identify the segment registers, a new set of #defines is
+introduced. It also includes two special identifiers. One of them
+indicates when the default segment register associated with instruction
+operands shall be used. Another one indicates that the contents of the
+segment register shall be ignored; this identifier is used when in long
+mode.
+
+Improvements-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: "Michael S. Tsirkin" <mst@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: ricardo.neri@intel.com
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
+Cc: Huang Rui <ray.huang@amd.com>
+Cc: Qiaowei Ren <qiaowei.ren@intel.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
+Cc: Chris Metcalf <cmetcalf@mellanox.com>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Colin Ian King <colin.king@canonical.com>
+Cc: Chen Yucong <slaoub@gmail.com>
+Cc: Adam Buchbinder <adam.buchbinder@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Lorenzo Stoakes <lstoakes@gmail.com>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Thomas Garnier <thgarnie@google.com>
+Link: https://lkml.kernel.org/r/1509135945-13762-14-git-send-email-ricardo.neri-calderon@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+
+(Partially cherry picked from commit 32d0b95300db03c2b23b2ea2c94769a4a138e79d)
+
+(cherry picked from commit ca2c18cb10c8beb56dfe21321abdddc724cec4de)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit abd7780592a3687eacc0a295d4d2959bb11ff75f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/inat.h | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
+index 02aff0867211..1c78580e58be 100644
+--- a/arch/x86/include/asm/inat.h
++++ b/arch/x86/include/asm/inat.h
+@@ -97,6 +97,16 @@
+ #define INAT_MAKE_GROUP(grp)  ((grp << INAT_GRP_OFFS) | INAT_MODRM)
+ #define INAT_MAKE_IMM(imm)    (imm << INAT_IMM_OFFS)
+ 
++/* Identifiers for segment registers */
++#define INAT_SEG_REG_IGNORE   0
++#define INAT_SEG_REG_DEFAULT  1
++#define INAT_SEG_REG_CS               2
++#define INAT_SEG_REG_SS               3
++#define INAT_SEG_REG_DS               4
++#define INAT_SEG_REG_ES               5
++#define INAT_SEG_REG_FS               6
++#define INAT_SEG_REG_GS               7
++
+ /* Attribute search APIs */
+ extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+ extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0140-x86-entry-64-paravirt-Use-paravirt-safe-macro-to-acc.patch b/patches/kernel/0140-x86-entry-64-paravirt-Use-paravirt-safe-macro-to-acc.patch

new file mode 100644 (file)

index 0000000..07d50ac
--- /dev/null
+++ b/patches/kernel/0140-x86-entry-64-paravirt-Use-paravirt-safe-macro-to-acc.patch
@@ -0,0 +1,129 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Date: Mon, 4 Dec 2017 15:07:07 +0100
+Subject: [PATCH] x86/entry/64/paravirt: Use paravirt-safe macro to access
+ eflags
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Commit 1d3e53e8624a ("x86/entry/64: Refactor IRQ stacks and make them
+NMI-safe") added DEBUG_ENTRY_ASSERT_IRQS_OFF macro that acceses eflags
+using 'pushfq' instruction when testing for IF bit. On PV Xen guests
+looking at IF flag directly will always see it set, resulting in 'ud2'.
+
+Introduce SAVE_FLAGS() macro that will use appropriate save_fl pv op when
+running paravirt.
+
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: xen-devel@lists.xenproject.org
+Link: https://lkml.kernel.org/r/20171204150604.899457242@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit e17f8234538d1ff708673f287a42457c4dee720d)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9f4a274842938ce8d55565ced4f45e7ad4a5da90)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/irqflags.h  | 3 +++
+ arch/x86/include/asm/paravirt.h  | 9 +++++++++
+ arch/x86/kernel/asm-offsets_64.c | 3 +++
+ arch/x86/entry/entry_64.S        | 7 ++++---
+ 4 files changed, 19 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
+index ac7692dcfa2e..d937781e1047 100644
+--- a/arch/x86/include/asm/irqflags.h
++++ b/arch/x86/include/asm/irqflags.h
+@@ -141,6 +141,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
+       swapgs;                                 \
+       sysretl
+ 
++#ifdef CONFIG_DEBUG_ENTRY
++#define SAVE_FLAGS(x)         pushfq; popq %rax
++#endif
+ #else
+ #define INTERRUPT_RETURN              iret
+ #define ENABLE_INTERRUPTS_SYSEXIT     sti; sysexit
+diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
+index 43d4f90edebc..52dcd2361a78 100644
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -926,6 +926,15 @@ extern void default_banner(void);
+       PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
+                 CLBR_NONE,                                            \
+                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
++
++#ifdef CONFIG_DEBUG_ENTRY
++#define SAVE_FLAGS(clobbers)                                        \
++      PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
++                PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
++                call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
++                PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
++#endif
++
+ #endif        /* CONFIG_X86_32 */
+ 
+ #endif /* __ASSEMBLY__ */
+diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
+index cf42206926af..c21a5315b38e 100644
+--- a/arch/x86/kernel/asm-offsets_64.c
++++ b/arch/x86/kernel/asm-offsets_64.c
+@@ -22,6 +22,9 @@ int main(void)
+ #ifdef CONFIG_PARAVIRT
+       OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
+       OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
++#ifdef CONFIG_DEBUG_ENTRY
++      OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
++#endif
+       BLANK();
+ #endif
+ 
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 2491b3b25b9a..6c73e96daf78 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -461,12 +461,13 @@ END(irq_entries_start)
+ 
+ .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
+ #ifdef CONFIG_DEBUG_ENTRY
+-      pushfq
+-      testl $X86_EFLAGS_IF, (%rsp)
++      pushq %rax
++      SAVE_FLAGS(CLBR_RAX)
++      testl $X86_EFLAGS_IF, %eax
+       jz .Lokay_\@
+       ud2
+ .Lokay_\@:
+-      addq $8, %rsp
++      popq %rax
+ #endif
+ .endm
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0140-x86-unwinder-orc-Dont-bail-on-stack-overflow.patch b/patches/kernel/0140-x86-unwinder-orc-Dont-bail-on-stack-overflow.patch

deleted file mode 100644 (file)

index b7267e8..0000000
--- a/patches/kernel/0140-x86-unwinder-orc-Dont-bail-on-stack-overflow.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:08 +0100
-Subject: [PATCH] x86/unwinder/orc: Dont bail on stack overflow
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-If the stack overflows into a guard page and the ORC unwinder should work
-well: by construction, there can't be any meaningful data in the guard page
-because no writes to the guard page will have succeeded.
-
-But there is a bug that prevents unwinding from working correctly: if the
-starting register state has RSP pointing into a stack guard page, the ORC
-unwinder bails out immediately.
-
-Instead of bailing out immediately check whether the next page up is a
-valid check page and if so analyze that. As a result the ORC unwinder will
-start the unwind.
-
-Tested by intentionally overflowing the task stack.  The result is an
-accurate call trace instead of a trace consisting purely of '?' entries.
-
-There are a few other bugs that are triggered if the unwinder encounters a
-stack overflow after the first step, but they are outside the scope of this
-fix.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150604.991389777@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit d3a09104018cf2ad5973dfa8a9c138ef9f5015a3)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit e5c3115ac69cddd384d6f7abc4a0ef030b247498)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/unwind_orc.c | 14 ++++++++++++--
- 1 file changed, 12 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
-index 570b70d3f604..cea85bfe93f7 100644
---- a/arch/x86/kernel/unwind_orc.c
-+++ b/arch/x86/kernel/unwind_orc.c
-@@ -552,8 +552,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
-       }
- 
-       if (get_stack_info((unsigned long *)state->sp, state->task,
--                         &state->stack_info, &state->stack_mask))
--              return;
-+                         &state->stack_info, &state->stack_mask)) {
-+              /*
-+               * We weren't on a valid stack.  It's possible that
-+               * we overflowed a valid stack into a guard page.
-+               * See if the next page up is valid so that we can
-+               * generate some kind of backtrace if this happens.
-+               */
-+              void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
-+              if (get_stack_info(next_page, state->task, &state->stack_info,
-+                                 &state->stack_mask))
-+                      return;
-+      }
- 
-       /*
-        * The caller can provide the address of the first frame directly
--- 
-2.14.2
-
diff --git a/patches/kernel/0141-x86-unwinder-Handle-stack-overflows-more-gracefully.patch b/patches/kernel/0141-x86-unwinder-Handle-stack-overflows-more-gracefully.patch

deleted file mode 100644 (file)

index 90e76d7..0000000
--- a/patches/kernel/0141-x86-unwinder-Handle-stack-overflows-more-gracefully.patch
+++ /dev/null
@@ -1,336 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Mon, 4 Dec 2017 15:07:09 +0100
-Subject: [PATCH] x86/unwinder: Handle stack overflows more gracefully
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-There are at least two unwinder bugs hindering the debugging of
-stack-overflow crashes:
-
-- It doesn't deal gracefully with the case where the stack overflows and
-  the stack pointer itself isn't on a valid stack but the
-  to-be-dereferenced data *is*.
-
-- The ORC oops dump code doesn't know how to print partial pt_regs, for the
-  case where if we get an interrupt/exception in *early* entry code
-  before the full pt_regs have been saved.
-
-Fix both issues.
-
-http://lkml.kernel.org/r/20171126024031.uxi4numpbjm5rlbr@treble
-
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bpetkov@suse.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150605.071425003@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit b02fcf9ba1211097754b286043cd87a8b4907e75)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9e51f396b068c3e8495cd130113e2f73b2b1f6b0)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/kdebug.h |  1 +
- arch/x86/include/asm/unwind.h |  7 ++++
- arch/x86/kernel/dumpstack.c   | 29 ++++++++++++++--
- arch/x86/kernel/process_64.c  | 12 +++----
- arch/x86/kernel/unwind_orc.c  | 78 +++++++++++++++----------------------------
- 5 files changed, 67 insertions(+), 60 deletions(-)
-
-diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
-index 29a594a3b82a..2a7769dd8fa2 100644
---- a/arch/x86/include/asm/kdebug.h
-+++ b/arch/x86/include/asm/kdebug.h
-@@ -25,6 +25,7 @@ extern void die(const char *, struct pt_regs *,long);
- extern int __must_check __die(const char *, struct pt_regs *, long);
- extern void show_stack_regs(struct pt_regs *regs);
- extern void __show_regs(struct pt_regs *regs, int all);
-+extern void show_iret_regs(struct pt_regs *regs);
- extern unsigned long oops_begin(void);
- extern void oops_end(unsigned long, struct pt_regs *, int signr);
- 
-diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
-index 35d67dc7b69f..38fa6154e382 100644
---- a/arch/x86/include/asm/unwind.h
-+++ b/arch/x86/include/asm/unwind.h
-@@ -6,6 +6,9 @@
- #include <asm/ptrace.h>
- #include <asm/stacktrace.h>
- 
-+#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
-+#define IRET_FRAME_SIZE   (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
-+
- struct unwind_state {
-       struct stack_info stack_info;
-       unsigned long stack_mask;
-@@ -51,6 +54,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
- }
- 
- #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
-+/*
-+ * WARNING: The entire pt_regs may not be safe to dereference.  In some cases,
-+ * only the iret frame registers are accessible.  Use with caution!
-+ */
- static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
- {
-       if (unwind_done(state))
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index dbce3cca94cb..695cdce5dfc8 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -50,6 +50,28 @@ static void printk_stack_address(unsigned long address, int reliable,
-       printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
- }
- 
-+void show_iret_regs(struct pt_regs *regs)
-+{
-+      printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
-+      printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
-+              regs->sp, regs->flags);
-+}
-+
-+static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
-+{
-+      if (on_stack(info, regs, sizeof(*regs)))
-+              __show_regs(regs, 0);
-+      else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
-+                        IRET_FRAME_SIZE)) {
-+              /*
-+               * When an interrupt or exception occurs in entry code, the
-+               * full pt_regs might not have been saved yet.  In that case
-+               * just print the iret frame.
-+               */
-+              show_iret_regs(regs);
-+      }
-+}
-+
- void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                       unsigned long *stack, char *log_lvl)
- {
-@@ -94,6 +116,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               if (stack_name)
-                       printk("%s <%s>\n", log_lvl, stack_name);
- 
-+              if (regs)
-+                      show_regs_safe(&stack_info, regs);
-+
-               /*
-                * Scan the stack, printing any text addresses we find.  At the
-                * same time, follow proper stack frames with the unwinder.
-@@ -116,7 +141,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- 
-                       /*
-                        * Don't print regs->ip again if it was already printed
--                       * by __show_regs() below.
-+                       * by show_regs_safe() below.
-                        */
-                       if (regs && stack == &regs->ip) {
-                               unwind_next_frame(&state);
-@@ -154,7 +179,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                       /* if the frame has entry regs, print them */
-                       regs = unwind_get_entry_regs(&state);
-                       if (regs)
--                              __show_regs(regs, 0);
-+                              show_regs_safe(&stack_info, regs);
-               }
- 
-               if (stack_name)
-diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
-index b08b9b6c40eb..01b119bebb68 100644
---- a/arch/x86/kernel/process_64.c
-+++ b/arch/x86/kernel/process_64.c
-@@ -69,10 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
-       unsigned int fsindex, gsindex;
-       unsigned int ds, cs, es;
- 
--      printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
--              (void *)regs->ip);
--      printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
--              regs->sp, regs->flags);
-+      show_iret_regs(regs);
-+
-       if (regs->orig_ax != -1)
-               pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
-       else
-@@ -89,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
-       printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
-              regs->r13, regs->r14, regs->r15);
- 
-+      if (!all)
-+              return;
-+
-       asm("movl %%ds,%0" : "=r" (ds));
-       asm("movl %%cs,%0" : "=r" (cs));
-       asm("movl %%es,%0" : "=r" (es));
-@@ -99,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
-       rdmsrl(MSR_GS_BASE, gs);
-       rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
- 
--      if (!all)
--              return;
--
-       cr0 = read_cr0();
-       cr2 = read_cr2();
-       cr3 = __read_cr3();
-diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
-index cea85bfe93f7..702f15f6b5be 100644
---- a/arch/x86/kernel/unwind_orc.c
-+++ b/arch/x86/kernel/unwind_orc.c
-@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
-       return NULL;
- }
- 
--static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
-+static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
-                           size_t len)
- {
-       struct stack_info *info = &state->stack_info;
-+      void *addr = (void *)_addr;
- 
--      /*
--       * If the address isn't on the current stack, switch to the next one.
--       *
--       * We may have to traverse multiple stacks to deal with the possibility
--       * that info->next_sp could point to an empty stack and the address
--       * could be on a subsequent stack.
--       */
--      while (!on_stack(info, (void *)addr, len))
--              if (get_stack_info(info->next_sp, state->task, info,
--                                 &state->stack_mask))
--                      return false;
-+      if (!on_stack(info, addr, len) &&
-+          (get_stack_info(addr, state->task, info, &state->stack_mask)))
-+              return false;
- 
-       return true;
- }
-@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
-       return true;
- }
- 
--#define REGS_SIZE (sizeof(struct pt_regs))
--#define SP_OFFSET (offsetof(struct pt_regs, sp))
--#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
--#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
--
- static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
--                           unsigned long *ip, unsigned long *sp, bool full)
-+                           unsigned long *ip, unsigned long *sp)
- {
--      size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
--      size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
--      struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
--
--      if (IS_ENABLED(CONFIG_X86_64)) {
--              if (!stack_access_ok(state, addr, regs_size))
--                      return false;
--
--              *ip = regs->ip;
--              *sp = regs->sp;
-+      struct pt_regs *regs = (struct pt_regs *)addr;
- 
--              return true;
--      }
-+      /* x86-32 support will be more complicated due to the &regs->sp hack */
-+      BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
- 
--      if (!stack_access_ok(state, addr, sp_offset))
-+      if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
-               return false;
- 
-       *ip = regs->ip;
-+      *sp = regs->sp;
-+      return true;
-+}
- 
--      if (user_mode(regs)) {
--              if (!stack_access_ok(state, addr + sp_offset,
--                                   REGS_SIZE - SP_OFFSET))
--                      return false;
-+static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
-+                                unsigned long *ip, unsigned long *sp)
-+{
-+      struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
- 
--              *sp = regs->sp;
--      } else
--              *sp = (unsigned long)&regs->sp;
-+      if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
-+              return false;
- 
-+      *ip = regs->ip;
-+      *sp = regs->sp;
-       return true;
- }
- 
-@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
-       unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
-       enum stack_type prev_type = state->stack_info.type;
-       struct orc_entry *orc;
--      struct pt_regs *ptregs;
-       bool indirect = false;
- 
-       if (unwind_done(state))
-@@ -435,8 +417,8 @@ bool unwind_next_frame(struct unwind_state *state)
-               break;
- 
-       case ORC_TYPE_REGS:
--              if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
--                      orc_warn("can't dereference registers at %p for ip %p\n",
-+              if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
-+                      orc_warn("can't dereference registers at %p for ip %pB\n",
-                                (void *)sp, (void *)orig_ip);
-                       goto done;
-               }
-@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
-               break;
- 
-       case ORC_TYPE_REGS_IRET:
--              if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
--                      orc_warn("can't dereference iret registers at %p for ip %p\n",
-+              if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
-+                      orc_warn("can't dereference iret registers at %p for ip %pB\n",
-                                (void *)sp, (void *)orig_ip);
-                       goto done;
-               }
- 
--              ptregs = container_of((void *)sp, struct pt_regs, ip);
--              if ((unsigned long)ptregs >= prev_sp &&
--                  on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
--                      state->regs = ptregs;
--                      state->full_regs = false;
--              } else
--                      state->regs = NULL;
--
-+              state->regs = (void *)sp - IRET_FRAME_OFFSET;
-+              state->full_regs = false;
-               state->signal = true;
-               break;
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0141-x86-unwinder-orc-Dont-bail-on-stack-overflow.patch b/patches/kernel/0141-x86-unwinder-orc-Dont-bail-on-stack-overflow.patch

new file mode 100644 (file)

index 0000000..b7267e8
--- /dev/null
+++ b/patches/kernel/0141-x86-unwinder-orc-Dont-bail-on-stack-overflow.patch
@@ -0,0 +1,91 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:08 +0100
+Subject: [PATCH] x86/unwinder/orc: Dont bail on stack overflow
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+If the stack overflows into a guard page and the ORC unwinder should work
+well: by construction, there can't be any meaningful data in the guard page
+because no writes to the guard page will have succeeded.
+
+But there is a bug that prevents unwinding from working correctly: if the
+starting register state has RSP pointing into a stack guard page, the ORC
+unwinder bails out immediately.
+
+Instead of bailing out immediately check whether the next page up is a
+valid check page and if so analyze that. As a result the ORC unwinder will
+start the unwind.
+
+Tested by intentionally overflowing the task stack.  The result is an
+accurate call trace instead of a trace consisting purely of '?' entries.
+
+There are a few other bugs that are triggered if the unwinder encounters a
+stack overflow after the first step, but they are outside the scope of this
+fix.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150604.991389777@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit d3a09104018cf2ad5973dfa8a9c138ef9f5015a3)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit e5c3115ac69cddd384d6f7abc4a0ef030b247498)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/unwind_orc.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
+index 570b70d3f604..cea85bfe93f7 100644
+--- a/arch/x86/kernel/unwind_orc.c
++++ b/arch/x86/kernel/unwind_orc.c
+@@ -552,8 +552,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
+       }
+ 
+       if (get_stack_info((unsigned long *)state->sp, state->task,
+-                         &state->stack_info, &state->stack_mask))
+-              return;
++                         &state->stack_info, &state->stack_mask)) {
++              /*
++               * We weren't on a valid stack.  It's possible that
++               * we overflowed a valid stack into a guard page.
++               * See if the next page up is valid so that we can
++               * generate some kind of backtrace if this happens.
++               */
++              void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
++              if (get_stack_info(next_page, state->task, &state->stack_info,
++                                 &state->stack_mask))
++                      return;
++      }
+ 
+       /*
+        * The caller can provide the address of the first frame directly
+-- 
+2.14.2
+
diff --git a/patches/kernel/0142-x86-irq-Remove-an-old-outdated-comment-about-context.patch b/patches/kernel/0142-x86-irq-Remove-an-old-outdated-comment-about-context.patch

deleted file mode 100644 (file)

index 5e9cc97..0000000
--- a/patches/kernel/0142-x86-irq-Remove-an-old-outdated-comment-about-context.patch
+++ /dev/null
@@ -1,75 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:10 +0100
-Subject: [PATCH] x86/irq: Remove an old outdated comment about context
- tracking races
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-That race has been fixed and code cleaned up for a while now.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150605.150551639@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 6669a692605547892a026445e460bf233958bd7f)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 7344db7580965d6f9994b6d7b1a74206d7635565)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/irq.c | 12 ------------
- 1 file changed, 12 deletions(-)
-
-diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
-index 4ed0aba8dbc8..a84142a910f3 100644
---- a/arch/x86/kernel/irq.c
-+++ b/arch/x86/kernel/irq.c
-@@ -222,18 +222,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
-       /* high bit used in ret_from_ code  */
-       unsigned vector = ~regs->orig_ax;
- 
--      /*
--       * NB: Unlike exception entries, IRQ entries do not reliably
--       * handle context tracking in the low-level entry code.  This is
--       * because syscall entries execute briefly with IRQs on before
--       * updating context tracking state, so we can take an IRQ from
--       * kernel mode with CONTEXT_USER.  The low-level entry code only
--       * updates the context if we came from user mode, so we won't
--       * switch to CONTEXT_KERNEL.  We'll fix that once the syscall
--       * code is cleaned up enough that we can cleanly defer enabling
--       * IRQs.
--       */
--
-       entering_irq();
- 
-       /* entering_irq() tells RCU that we're not quiescent.  Check it. */
--- 
-2.14.2
-
diff --git a/patches/kernel/0142-x86-unwinder-Handle-stack-overflows-more-gracefully.patch b/patches/kernel/0142-x86-unwinder-Handle-stack-overflows-more-gracefully.patch

new file mode 100644 (file)

index 0000000..90e76d7
--- /dev/null
+++ b/patches/kernel/0142-x86-unwinder-Handle-stack-overflows-more-gracefully.patch
@@ -0,0 +1,336 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 4 Dec 2017 15:07:09 +0100
+Subject: [PATCH] x86/unwinder: Handle stack overflows more gracefully
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+There are at least two unwinder bugs hindering the debugging of
+stack-overflow crashes:
+
+- It doesn't deal gracefully with the case where the stack overflows and
+  the stack pointer itself isn't on a valid stack but the
+  to-be-dereferenced data *is*.
+
+- The ORC oops dump code doesn't know how to print partial pt_regs, for the
+  case where if we get an interrupt/exception in *early* entry code
+  before the full pt_regs have been saved.
+
+Fix both issues.
+
+http://lkml.kernel.org/r/20171126024031.uxi4numpbjm5rlbr@treble
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bpetkov@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.071425003@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit b02fcf9ba1211097754b286043cd87a8b4907e75)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9e51f396b068c3e8495cd130113e2f73b2b1f6b0)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/kdebug.h |  1 +
+ arch/x86/include/asm/unwind.h |  7 ++++
+ arch/x86/kernel/dumpstack.c   | 29 ++++++++++++++--
+ arch/x86/kernel/process_64.c  | 12 +++----
+ arch/x86/kernel/unwind_orc.c  | 78 +++++++++++++++----------------------------
+ 5 files changed, 67 insertions(+), 60 deletions(-)
+
+diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
+index 29a594a3b82a..2a7769dd8fa2 100644
+--- a/arch/x86/include/asm/kdebug.h
++++ b/arch/x86/include/asm/kdebug.h
+@@ -25,6 +25,7 @@ extern void die(const char *, struct pt_regs *,long);
+ extern int __must_check __die(const char *, struct pt_regs *, long);
+ extern void show_stack_regs(struct pt_regs *regs);
+ extern void __show_regs(struct pt_regs *regs, int all);
++extern void show_iret_regs(struct pt_regs *regs);
+ extern unsigned long oops_begin(void);
+ extern void oops_end(unsigned long, struct pt_regs *, int signr);
+ 
+diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
+index 35d67dc7b69f..38fa6154e382 100644
+--- a/arch/x86/include/asm/unwind.h
++++ b/arch/x86/include/asm/unwind.h
+@@ -6,6 +6,9 @@
+ #include <asm/ptrace.h>
+ #include <asm/stacktrace.h>
+ 
++#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
++#define IRET_FRAME_SIZE   (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
++
+ struct unwind_state {
+       struct stack_info stack_info;
+       unsigned long stack_mask;
+@@ -51,6 +54,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
+ }
+ 
+ #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
++/*
++ * WARNING: The entire pt_regs may not be safe to dereference.  In some cases,
++ * only the iret frame registers are accessible.  Use with caution!
++ */
+ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+ {
+       if (unwind_done(state))
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index dbce3cca94cb..695cdce5dfc8 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -50,6 +50,28 @@ static void printk_stack_address(unsigned long address, int reliable,
+       printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
+ }
+ 
++void show_iret_regs(struct pt_regs *regs)
++{
++      printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
++      printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
++              regs->sp, regs->flags);
++}
++
++static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
++{
++      if (on_stack(info, regs, sizeof(*regs)))
++              __show_regs(regs, 0);
++      else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
++                        IRET_FRAME_SIZE)) {
++              /*
++               * When an interrupt or exception occurs in entry code, the
++               * full pt_regs might not have been saved yet.  In that case
++               * just print the iret frame.
++               */
++              show_iret_regs(regs);
++      }
++}
++
+ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+                       unsigned long *stack, char *log_lvl)
+ {
+@@ -94,6 +116,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+               if (stack_name)
+                       printk("%s <%s>\n", log_lvl, stack_name);
+ 
++              if (regs)
++                      show_regs_safe(&stack_info, regs);
++
+               /*
+                * Scan the stack, printing any text addresses we find.  At the
+                * same time, follow proper stack frames with the unwinder.
+@@ -116,7 +141,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ 
+                       /*
+                        * Don't print regs->ip again if it was already printed
+-                       * by __show_regs() below.
++                       * by show_regs_safe() below.
+                        */
+                       if (regs && stack == &regs->ip) {
+                               unwind_next_frame(&state);
+@@ -154,7 +179,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+                       /* if the frame has entry regs, print them */
+                       regs = unwind_get_entry_regs(&state);
+                       if (regs)
+-                              __show_regs(regs, 0);
++                              show_regs_safe(&stack_info, regs);
+               }
+ 
+               if (stack_name)
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index b08b9b6c40eb..01b119bebb68 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -69,10 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
+       unsigned int fsindex, gsindex;
+       unsigned int ds, cs, es;
+ 
+-      printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
+-              (void *)regs->ip);
+-      printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
+-              regs->sp, regs->flags);
++      show_iret_regs(regs);
++
+       if (regs->orig_ax != -1)
+               pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
+       else
+@@ -89,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
+       printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
+              regs->r13, regs->r14, regs->r15);
+ 
++      if (!all)
++              return;
++
+       asm("movl %%ds,%0" : "=r" (ds));
+       asm("movl %%cs,%0" : "=r" (cs));
+       asm("movl %%es,%0" : "=r" (es));
+@@ -99,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
+       rdmsrl(MSR_GS_BASE, gs);
+       rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+ 
+-      if (!all)
+-              return;
+-
+       cr0 = read_cr0();
+       cr2 = read_cr2();
+       cr3 = __read_cr3();
+diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
+index cea85bfe93f7..702f15f6b5be 100644
+--- a/arch/x86/kernel/unwind_orc.c
++++ b/arch/x86/kernel/unwind_orc.c
+@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+       return NULL;
+ }
+ 
+-static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
++static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
+                           size_t len)
+ {
+       struct stack_info *info = &state->stack_info;
++      void *addr = (void *)_addr;
+ 
+-      /*
+-       * If the address isn't on the current stack, switch to the next one.
+-       *
+-       * We may have to traverse multiple stacks to deal with the possibility
+-       * that info->next_sp could point to an empty stack and the address
+-       * could be on a subsequent stack.
+-       */
+-      while (!on_stack(info, (void *)addr, len))
+-              if (get_stack_info(info->next_sp, state->task, info,
+-                                 &state->stack_mask))
+-                      return false;
++      if (!on_stack(info, addr, len) &&
++          (get_stack_info(addr, state->task, info, &state->stack_mask)))
++              return false;
+ 
+       return true;
+ }
+@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
+       return true;
+ }
+ 
+-#define REGS_SIZE (sizeof(struct pt_regs))
+-#define SP_OFFSET (offsetof(struct pt_regs, sp))
+-#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
+-#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
+-
+ static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
+-                           unsigned long *ip, unsigned long *sp, bool full)
++                           unsigned long *ip, unsigned long *sp)
+ {
+-      size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
+-      size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
+-      struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
+-
+-      if (IS_ENABLED(CONFIG_X86_64)) {
+-              if (!stack_access_ok(state, addr, regs_size))
+-                      return false;
+-
+-              *ip = regs->ip;
+-              *sp = regs->sp;
++      struct pt_regs *regs = (struct pt_regs *)addr;
+ 
+-              return true;
+-      }
++      /* x86-32 support will be more complicated due to the &regs->sp hack */
++      BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
+ 
+-      if (!stack_access_ok(state, addr, sp_offset))
++      if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
+               return false;
+ 
+       *ip = regs->ip;
++      *sp = regs->sp;
++      return true;
++}
+ 
+-      if (user_mode(regs)) {
+-              if (!stack_access_ok(state, addr + sp_offset,
+-                                   REGS_SIZE - SP_OFFSET))
+-                      return false;
++static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
++                                unsigned long *ip, unsigned long *sp)
++{
++      struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
+ 
+-              *sp = regs->sp;
+-      } else
+-              *sp = (unsigned long)&regs->sp;
++      if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
++              return false;
+ 
++      *ip = regs->ip;
++      *sp = regs->sp;
+       return true;
+ }
+ 
+@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
+       unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
+       enum stack_type prev_type = state->stack_info.type;
+       struct orc_entry *orc;
+-      struct pt_regs *ptregs;
+       bool indirect = false;
+ 
+       if (unwind_done(state))
+@@ -435,8 +417,8 @@ bool unwind_next_frame(struct unwind_state *state)
+               break;
+ 
+       case ORC_TYPE_REGS:
+-              if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
+-                      orc_warn("can't dereference registers at %p for ip %p\n",
++              if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
++                      orc_warn("can't dereference registers at %p for ip %pB\n",
+                                (void *)sp, (void *)orig_ip);
+                       goto done;
+               }
+@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
+               break;
+ 
+       case ORC_TYPE_REGS_IRET:
+-              if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
+-                      orc_warn("can't dereference iret registers at %p for ip %p\n",
++              if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
++                      orc_warn("can't dereference iret registers at %p for ip %pB\n",
+                                (void *)sp, (void *)orig_ip);
+                       goto done;
+               }
+ 
+-              ptregs = container_of((void *)sp, struct pt_regs, ip);
+-              if ((unsigned long)ptregs >= prev_sp &&
+-                  on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
+-                      state->regs = ptregs;
+-                      state->full_regs = false;
+-              } else
+-                      state->regs = NULL;
+-
++              state->regs = (void *)sp - IRET_FRAME_OFFSET;
++              state->full_regs = false;
+               state->signal = true;
+               break;
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0143-x86-irq-64-Print-the-offending-IP-in-the-stack-overf.patch b/patches/kernel/0143-x86-irq-64-Print-the-offending-IP-in-the-stack-overf.patch

deleted file mode 100644 (file)

index 1231f6f..0000000
--- a/patches/kernel/0143-x86-irq-64-Print-the-offending-IP-in-the-stack-overf.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:11 +0100
-Subject: [PATCH] x86/irq/64: Print the offending IP in the stack overflow
- warning
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-In case something goes wrong with unwind (not unlikely in case of
-overflow), print the offending IP where we detected the overflow.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150605.231677119@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 4f3789e792296e21405f708cf3cb409d7c7d5683)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit aa820446b0d31df0870b176257b40baadaf4444c)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/irq_64.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
-index 3be74fbdeff2..feca14980e32 100644
---- a/arch/x86/kernel/irq_64.c
-+++ b/arch/x86/kernel/irq_64.c
-@@ -56,10 +56,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
-       if (regs->sp >= estack_top && regs->sp <= estack_bottom)
-               return;
- 
--      WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
-+      WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
-               current->comm, curbase, regs->sp,
-               irq_stack_top, irq_stack_bottom,
--              estack_top, estack_bottom);
-+              estack_top, estack_bottom, (void *)regs->ip);
- 
-       if (sysctl_panic_on_stackoverflow)
-               panic("low stack detected by irq handler - check messages\n");
--- 
-2.14.2
-
diff --git a/patches/kernel/0143-x86-irq-Remove-an-old-outdated-comment-about-context.patch b/patches/kernel/0143-x86-irq-Remove-an-old-outdated-comment-about-context.patch

new file mode 100644 (file)

index 0000000..5e9cc97
--- /dev/null
+++ b/patches/kernel/0143-x86-irq-Remove-an-old-outdated-comment-about-context.patch
@@ -0,0 +1,75 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:10 +0100
+Subject: [PATCH] x86/irq: Remove an old outdated comment about context
+ tracking races
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+That race has been fixed and code cleaned up for a while now.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.150551639@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 6669a692605547892a026445e460bf233958bd7f)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 7344db7580965d6f9994b6d7b1a74206d7635565)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/irq.c | 12 ------------
+ 1 file changed, 12 deletions(-)
+
+diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
+index 4ed0aba8dbc8..a84142a910f3 100644
+--- a/arch/x86/kernel/irq.c
++++ b/arch/x86/kernel/irq.c
+@@ -222,18 +222,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
+       /* high bit used in ret_from_ code  */
+       unsigned vector = ~regs->orig_ax;
+ 
+-      /*
+-       * NB: Unlike exception entries, IRQ entries do not reliably
+-       * handle context tracking in the low-level entry code.  This is
+-       * because syscall entries execute briefly with IRQs on before
+-       * updating context tracking state, so we can take an IRQ from
+-       * kernel mode with CONTEXT_USER.  The low-level entry code only
+-       * updates the context if we came from user mode, so we won't
+-       * switch to CONTEXT_KERNEL.  We'll fix that once the syscall
+-       * code is cleaned up enough that we can cleanly defer enabling
+-       * IRQs.
+-       */
+-
+       entering_irq();
+ 
+       /* entering_irq() tells RCU that we're not quiescent.  Check it. */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0144-x86-entry-64-Allocate-and-enable-the-SYSENTER-stack.patch b/patches/kernel/0144-x86-entry-64-Allocate-and-enable-the-SYSENTER-stack.patch

deleted file mode 100644 (file)

index 8a0b80f..0000000
--- a/patches/kernel/0144-x86-entry-64-Allocate-and-enable-the-SYSENTER-stack.patch
+++ /dev/null
@@ -1,182 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:12 +0100
-Subject: [PATCH] x86/entry/64: Allocate and enable the SYSENTER stack
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This will simplify future changes that want scratch variables early in
-the SYSENTER handler -- they'll be able to spill registers to the
-stack.  It also lets us get rid of a SWAPGS_UNSAFE_STACK user.
-
-This does not depend on CONFIG_IA32_EMULATION=y because we'll want the
-stack space even without IA32 emulation.
-
-As far as I can tell, the reason that this wasn't done from day 1 is
-that we use IST for #DB and #BP, which is IMO rather nasty and causes
-a lot more problems than it solves.  But, since #DB uses IST, we don't
-actually need a real stack for SYSENTER (because SYSENTER with TF set
-will invoke #DB on the IST stack rather than the SYSENTER stack).
-
-I want to remove IST usage from these vectors some day, and this patch
-is a prerequisite for that as well.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150605.312726423@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 1a79797b58cddfa948420a7553241c79c013e3ca)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8e621515fa8d1649b031f34b9d498dcd865db1c3)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/processor.h | 3 ---
- arch/x86/kernel/asm-offsets.c    | 5 +++++
- arch/x86/kernel/asm-offsets_32.c | 5 -----
- arch/x86/kernel/cpu/common.c     | 4 +++-
- arch/x86/kernel/process.c        | 2 --
- arch/x86/kernel/traps.c          | 3 +--
- arch/x86/entry/entry_64_compat.S | 2 +-
- 7 files changed, 10 insertions(+), 14 deletions(-)
-
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 79739e5f939a..5225917f9760 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -333,14 +333,11 @@ struct tss_struct {
-        */
-       unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
- 
--#ifdef CONFIG_X86_32
-       /*
-        * Space for the temporary SYSENTER stack.
-        */
-       unsigned long           SYSENTER_stack_canary;
-       unsigned long           SYSENTER_stack[64];
--#endif
--
- } ____cacheline_aligned;
- 
- DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
-diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
-index de827d6ac8c2..031bd35bd911 100644
---- a/arch/x86/kernel/asm-offsets.c
-+++ b/arch/x86/kernel/asm-offsets.c
-@@ -92,4 +92,9 @@ void common(void) {
- 
-       BLANK();
-       DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
-+
-+      /* Offset from cpu_tss to SYSENTER_stack */
-+      OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
-+      /* Size of SYSENTER_stack */
-+      DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
- }
-diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
-index 880aa093268d..d09b161a3bd0 100644
---- a/arch/x86/kernel/asm-offsets_32.c
-+++ b/arch/x86/kernel/asm-offsets_32.c
-@@ -52,11 +52,6 @@ void foo(void)
-       DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
-              offsetofend(struct tss_struct, SYSENTER_stack));
- 
--      /* Offset from cpu_tss to SYSENTER_stack */
--      OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
--      /* Size of SYSENTER_stack */
--      DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
--
- #ifdef CONFIG_CC_STACKPROTECTOR
-       BLANK();
-       OFFSET(stack_canary_offset, stack_canary, canary);
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 121fe3570d6f..aa97e4cd3a33 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -1362,7 +1362,9 @@ void syscall_init(void)
-        * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
-        */
-       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
--      wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
-+      wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
-+                  (unsigned long)this_cpu_ptr(&cpu_tss) +
-+                  offsetofend(struct tss_struct, SYSENTER_stack));
-       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
- #else
-       wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
-diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index ccf3a4f4ef68..aa86e810fb54 100644
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -70,9 +70,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
-         */
-       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
- #endif
--#ifdef CONFIG_X86_32
-       .SYSENTER_stack_canary  = STACK_END_MAGIC,
--#endif
- };
- EXPORT_PER_CPU_SYMBOL(cpu_tss);
- 
-diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
-index 3a46cab2696e..7b1d0df624cf 100644
---- a/arch/x86/kernel/traps.c
-+++ b/arch/x86/kernel/traps.c
-@@ -806,14 +806,13 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
-       debug_stack_usage_dec();
- 
- exit:
--#if defined(CONFIG_X86_32)
-       /*
-        * This is the most likely code path that involves non-trivial use
-        * of the SYSENTER stack.  Check that we haven't overrun it.
-        */
-       WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
-            "Overran or corrupted SYSENTER stack\n");
--#endif
-+
-       ist_exit(regs);
- }
- NOKPROBE_SYMBOL(do_debug);
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index be745b7a3e3e..1f76b66518ee 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -47,7 +47,7 @@
-  */
- ENTRY(entry_SYSENTER_compat)
-       /* Interrupts are off on entry. */
--      SWAPGS_UNSAFE_STACK
-+      SWAPGS
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- 
-       /*
--- 
-2.14.2
-
diff --git a/patches/kernel/0144-x86-irq-64-Print-the-offending-IP-in-the-stack-overf.patch b/patches/kernel/0144-x86-irq-64-Print-the-offending-IP-in-the-stack-overf.patch

new file mode 100644 (file)

index 0000000..1231f6f
--- /dev/null
+++ b/patches/kernel/0144-x86-irq-64-Print-the-offending-IP-in-the-stack-overf.patch
@@ -0,0 +1,70 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:11 +0100
+Subject: [PATCH] x86/irq/64: Print the offending IP in the stack overflow
+ warning
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+In case something goes wrong with unwind (not unlikely in case of
+overflow), print the offending IP where we detected the overflow.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.231677119@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 4f3789e792296e21405f708cf3cb409d7c7d5683)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit aa820446b0d31df0870b176257b40baadaf4444c)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/irq_64.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
+index 3be74fbdeff2..feca14980e32 100644
+--- a/arch/x86/kernel/irq_64.c
++++ b/arch/x86/kernel/irq_64.c
+@@ -56,10 +56,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
+       if (regs->sp >= estack_top && regs->sp <= estack_bottom)
+               return;
+ 
+-      WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
++      WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
+               current->comm, curbase, regs->sp,
+               irq_stack_top, irq_stack_bottom,
+-              estack_top, estack_bottom);
++              estack_top, estack_bottom, (void *)regs->ip);
+ 
+       if (sysctl_panic_on_stackoverflow)
+               panic("low stack detected by irq handler - check messages\n");
+-- 
+2.14.2
+
diff --git a/patches/kernel/0145-x86-dumpstack-Add-get_stack_info-support-for-the-SYS.patch b/patches/kernel/0145-x86-dumpstack-Add-get_stack_info-support-for-the-SYS.patch

deleted file mode 100644 (file)

index fcf8851..0000000
--- a/patches/kernel/0145-x86-dumpstack-Add-get_stack_info-support-for-the-SYS.patch
+++ /dev/null
@@ -1,184 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:13 +0100
-Subject: [PATCH] x86/dumpstack: Add get_stack_info() support for the SYSENTER
- stack
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-get_stack_info() doesn't currently know about the SYSENTER stack, so
-unwinding will fail if we entered the kernel on the SYSENTER stack
-and haven't fully switched off.  Teach get_stack_info() about the
-SYSENTER stack.
-
-With future patches applied that run part of the entry code on the
-SYSENTER stack and introduce an intentional BUG(), I would get:
-
-  PANIC: double fault, error_code: 0x0
-  ...
-  RIP: 0010:do_error_trap+0x33/0x1c0
-  ...
-  Call Trace:
-  Code: ...
-
-With this patch, I get:
-
-  PANIC: double fault, error_code: 0x0
-  ...
-  Call Trace:
-   <SYSENTER>
-   ? async_page_fault+0x36/0x60
-   ? invalid_op+0x22/0x40
-   ? async_page_fault+0x36/0x60
-   ? sync_regs+0x3c/0x40
-   ? sync_regs+0x2e/0x40
-   ? error_entry+0x6c/0xd0
-   ? async_page_fault+0x36/0x60
-   </SYSENTER>
-  Code: ...
-
-which is a lot more informative.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150605.392711508@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 33a2f1a6c4d7c0a02d1c006fb0379cc5ca3b96bb)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 72e90cc5463cf882c5f9508817029d85b317f2b5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/stacktrace.h |  3 +++
- arch/x86/kernel/dumpstack.c       | 19 +++++++++++++++++++
- arch/x86/kernel/dumpstack_32.c    |  6 ++++++
- arch/x86/kernel/dumpstack_64.c    |  6 ++++++
- 4 files changed, 34 insertions(+)
-
-diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
-index 2e41c50ddf47..95f999576131 100644
---- a/arch/x86/include/asm/stacktrace.h
-+++ b/arch/x86/include/asm/stacktrace.h
-@@ -15,6 +15,7 @@ enum stack_type {
-       STACK_TYPE_TASK,
-       STACK_TYPE_IRQ,
-       STACK_TYPE_SOFTIRQ,
-+      STACK_TYPE_SYSENTER,
-       STACK_TYPE_EXCEPTION,
-       STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
- };
-@@ -27,6 +28,8 @@ struct stack_info {
- bool in_task_stack(unsigned long *stack, struct task_struct *task,
-                  struct stack_info *info);
- 
-+bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
-+
- int get_stack_info(unsigned long *stack, struct task_struct *task,
-                  struct stack_info *info, unsigned long *visit_mask);
- 
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index 695cdce5dfc8..c211cbdff709 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -43,6 +43,25 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
-       return true;
- }
- 
-+bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
-+{
-+      struct tss_struct *tss = this_cpu_ptr(&cpu_tss);
-+
-+      /* Treat the canary as part of the stack for unwinding purposes. */
-+      void *begin = &tss->SYSENTER_stack_canary;
-+      void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack);
-+
-+      if ((void *)stack < begin || (void *)stack >= end)
-+              return false;
-+
-+      info->type      = STACK_TYPE_SYSENTER;
-+      info->begin     = begin;
-+      info->end       = end;
-+      info->next_sp   = NULL;
-+
-+      return true;
-+}
-+
- static void printk_stack_address(unsigned long address, int reliable,
-                                char *log_lvl)
- {
-diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
-index e5f0b40e66d2..3160bf2d100e 100644
---- a/arch/x86/kernel/dumpstack_32.c
-+++ b/arch/x86/kernel/dumpstack_32.c
-@@ -25,6 +25,9 @@ const char *stack_type_name(enum stack_type type)
-       if (type == STACK_TYPE_SOFTIRQ)
-               return "SOFTIRQ";
- 
-+      if (type == STACK_TYPE_SYSENTER)
-+              return "SYSENTER";
-+
-       return NULL;
- }
- 
-@@ -92,6 +95,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
-       if (task != current)
-               goto unknown;
- 
-+      if (in_sysenter_stack(stack, info))
-+              goto recursion_check;
-+
-       if (in_hardirq_stack(stack, info))
-               goto recursion_check;
- 
-diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
-index 3e1471d57487..f5107b659f86 100644
---- a/arch/x86/kernel/dumpstack_64.c
-+++ b/arch/x86/kernel/dumpstack_64.c
-@@ -36,6 +36,9 @@ const char *stack_type_name(enum stack_type type)
-       if (type == STACK_TYPE_IRQ)
-               return "IRQ";
- 
-+      if (type == STACK_TYPE_SYSENTER)
-+              return "SYSENTER";
-+
-       if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
-               return exception_stack_names[type - STACK_TYPE_EXCEPTION];
- 
-@@ -114,6 +117,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
-       if (in_irq_stack(stack, info))
-               goto recursion_check;
- 
-+      if (in_sysenter_stack(stack, info))
-+              goto recursion_check;
-+
-       goto unknown;
- 
- recursion_check:
--- 
-2.14.2
-
diff --git a/patches/kernel/0145-x86-entry-64-Allocate-and-enable-the-SYSENTER-stack.patch b/patches/kernel/0145-x86-entry-64-Allocate-and-enable-the-SYSENTER-stack.patch

new file mode 100644 (file)

index 0000000..8a0b80f
--- /dev/null
+++ b/patches/kernel/0145-x86-entry-64-Allocate-and-enable-the-SYSENTER-stack.patch
@@ -0,0 +1,182 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:12 +0100
+Subject: [PATCH] x86/entry/64: Allocate and enable the SYSENTER stack
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This will simplify future changes that want scratch variables early in
+the SYSENTER handler -- they'll be able to spill registers to the
+stack.  It also lets us get rid of a SWAPGS_UNSAFE_STACK user.
+
+This does not depend on CONFIG_IA32_EMULATION=y because we'll want the
+stack space even without IA32 emulation.
+
+As far as I can tell, the reason that this wasn't done from day 1 is
+that we use IST for #DB and #BP, which is IMO rather nasty and causes
+a lot more problems than it solves.  But, since #DB uses IST, we don't
+actually need a real stack for SYSENTER (because SYSENTER with TF set
+will invoke #DB on the IST stack rather than the SYSENTER stack).
+
+I want to remove IST usage from these vectors some day, and this patch
+is a prerequisite for that as well.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.312726423@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 1a79797b58cddfa948420a7553241c79c013e3ca)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8e621515fa8d1649b031f34b9d498dcd865db1c3)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/processor.h | 3 ---
+ arch/x86/kernel/asm-offsets.c    | 5 +++++
+ arch/x86/kernel/asm-offsets_32.c | 5 -----
+ arch/x86/kernel/cpu/common.c     | 4 +++-
+ arch/x86/kernel/process.c        | 2 --
+ arch/x86/kernel/traps.c          | 3 +--
+ arch/x86/entry/entry_64_compat.S | 2 +-
+ 7 files changed, 10 insertions(+), 14 deletions(-)
+
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 79739e5f939a..5225917f9760 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -333,14 +333,11 @@ struct tss_struct {
+        */
+       unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
+ 
+-#ifdef CONFIG_X86_32
+       /*
+        * Space for the temporary SYSENTER stack.
+        */
+       unsigned long           SYSENTER_stack_canary;
+       unsigned long           SYSENTER_stack[64];
+-#endif
+-
+ } ____cacheline_aligned;
+ 
+ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
+index de827d6ac8c2..031bd35bd911 100644
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -92,4 +92,9 @@ void common(void) {
+ 
+       BLANK();
+       DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
++
++      /* Offset from cpu_tss to SYSENTER_stack */
++      OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
++      /* Size of SYSENTER_stack */
++      DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+ }
+diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
+index 880aa093268d..d09b161a3bd0 100644
+--- a/arch/x86/kernel/asm-offsets_32.c
++++ b/arch/x86/kernel/asm-offsets_32.c
+@@ -52,11 +52,6 @@ void foo(void)
+       DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
+              offsetofend(struct tss_struct, SYSENTER_stack));
+ 
+-      /* Offset from cpu_tss to SYSENTER_stack */
+-      OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+-      /* Size of SYSENTER_stack */
+-      DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+-
+ #ifdef CONFIG_CC_STACKPROTECTOR
+       BLANK();
+       OFFSET(stack_canary_offset, stack_canary, canary);
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 121fe3570d6f..aa97e4cd3a33 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1362,7 +1362,9 @@ void syscall_init(void)
+        * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+        */
+       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+-      wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
++      wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
++                  (unsigned long)this_cpu_ptr(&cpu_tss) +
++                  offsetofend(struct tss_struct, SYSENTER_stack));
+       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+ #else
+       wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index ccf3a4f4ef68..aa86e810fb54 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -70,9 +70,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+         */
+       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
+ #endif
+-#ifdef CONFIG_X86_32
+       .SYSENTER_stack_canary  = STACK_END_MAGIC,
+-#endif
+ };
+ EXPORT_PER_CPU_SYMBOL(cpu_tss);
+ 
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index 3a46cab2696e..7b1d0df624cf 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -806,14 +806,13 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
+       debug_stack_usage_dec();
+ 
+ exit:
+-#if defined(CONFIG_X86_32)
+       /*
+        * This is the most likely code path that involves non-trivial use
+        * of the SYSENTER stack.  Check that we haven't overrun it.
+        */
+       WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
+            "Overran or corrupted SYSENTER stack\n");
+-#endif
++
+       ist_exit(regs);
+ }
+ NOKPROBE_SYMBOL(do_debug);
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index be745b7a3e3e..1f76b66518ee 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -47,7 +47,7 @@
+  */
+ ENTRY(entry_SYSENTER_compat)
+       /* Interrupts are off on entry. */
+-      SWAPGS_UNSAFE_STACK
++      SWAPGS
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ 
+       /*
+-- 
+2.14.2
+
diff --git a/patches/kernel/0146-x86-dumpstack-Add-get_stack_info-support-for-the-SYS.patch b/patches/kernel/0146-x86-dumpstack-Add-get_stack_info-support-for-the-SYS.patch

new file mode 100644 (file)

index 0000000..fcf8851
--- /dev/null
+++ b/patches/kernel/0146-x86-dumpstack-Add-get_stack_info-support-for-the-SYS.patch
@@ -0,0 +1,184 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:13 +0100
+Subject: [PATCH] x86/dumpstack: Add get_stack_info() support for the SYSENTER
+ stack
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+get_stack_info() doesn't currently know about the SYSENTER stack, so
+unwinding will fail if we entered the kernel on the SYSENTER stack
+and haven't fully switched off.  Teach get_stack_info() about the
+SYSENTER stack.
+
+With future patches applied that run part of the entry code on the
+SYSENTER stack and introduce an intentional BUG(), I would get:
+
+  PANIC: double fault, error_code: 0x0
+  ...
+  RIP: 0010:do_error_trap+0x33/0x1c0
+  ...
+  Call Trace:
+  Code: ...
+
+With this patch, I get:
+
+  PANIC: double fault, error_code: 0x0
+  ...
+  Call Trace:
+   <SYSENTER>
+   ? async_page_fault+0x36/0x60
+   ? invalid_op+0x22/0x40
+   ? async_page_fault+0x36/0x60
+   ? sync_regs+0x3c/0x40
+   ? sync_regs+0x2e/0x40
+   ? error_entry+0x6c/0xd0
+   ? async_page_fault+0x36/0x60
+   </SYSENTER>
+  Code: ...
+
+which is a lot more informative.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.392711508@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 33a2f1a6c4d7c0a02d1c006fb0379cc5ca3b96bb)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 72e90cc5463cf882c5f9508817029d85b317f2b5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/stacktrace.h |  3 +++
+ arch/x86/kernel/dumpstack.c       | 19 +++++++++++++++++++
+ arch/x86/kernel/dumpstack_32.c    |  6 ++++++
+ arch/x86/kernel/dumpstack_64.c    |  6 ++++++
+ 4 files changed, 34 insertions(+)
+
+diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
+index 2e41c50ddf47..95f999576131 100644
+--- a/arch/x86/include/asm/stacktrace.h
++++ b/arch/x86/include/asm/stacktrace.h
+@@ -15,6 +15,7 @@ enum stack_type {
+       STACK_TYPE_TASK,
+       STACK_TYPE_IRQ,
+       STACK_TYPE_SOFTIRQ,
++      STACK_TYPE_SYSENTER,
+       STACK_TYPE_EXCEPTION,
+       STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
+ };
+@@ -27,6 +28,8 @@ struct stack_info {
+ bool in_task_stack(unsigned long *stack, struct task_struct *task,
+                  struct stack_info *info);
+ 
++bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
++
+ int get_stack_info(unsigned long *stack, struct task_struct *task,
+                  struct stack_info *info, unsigned long *visit_mask);
+ 
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index 695cdce5dfc8..c211cbdff709 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -43,6 +43,25 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
+       return true;
+ }
+ 
++bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
++{
++      struct tss_struct *tss = this_cpu_ptr(&cpu_tss);
++
++      /* Treat the canary as part of the stack for unwinding purposes. */
++      void *begin = &tss->SYSENTER_stack_canary;
++      void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack);
++
++      if ((void *)stack < begin || (void *)stack >= end)
++              return false;
++
++      info->type      = STACK_TYPE_SYSENTER;
++      info->begin     = begin;
++      info->end       = end;
++      info->next_sp   = NULL;
++
++      return true;
++}
++
+ static void printk_stack_address(unsigned long address, int reliable,
+                                char *log_lvl)
+ {
+diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
+index e5f0b40e66d2..3160bf2d100e 100644
+--- a/arch/x86/kernel/dumpstack_32.c
++++ b/arch/x86/kernel/dumpstack_32.c
+@@ -25,6 +25,9 @@ const char *stack_type_name(enum stack_type type)
+       if (type == STACK_TYPE_SOFTIRQ)
+               return "SOFTIRQ";
+ 
++      if (type == STACK_TYPE_SYSENTER)
++              return "SYSENTER";
++
+       return NULL;
+ }
+ 
+@@ -92,6 +95,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
+       if (task != current)
+               goto unknown;
+ 
++      if (in_sysenter_stack(stack, info))
++              goto recursion_check;
++
+       if (in_hardirq_stack(stack, info))
+               goto recursion_check;
+ 
+diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
+index 3e1471d57487..f5107b659f86 100644
+--- a/arch/x86/kernel/dumpstack_64.c
++++ b/arch/x86/kernel/dumpstack_64.c
+@@ -36,6 +36,9 @@ const char *stack_type_name(enum stack_type type)
+       if (type == STACK_TYPE_IRQ)
+               return "IRQ";
+ 
++      if (type == STACK_TYPE_SYSENTER)
++              return "SYSENTER";
++
+       if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
+               return exception_stack_names[type - STACK_TYPE_EXCEPTION];
+ 
+@@ -114,6 +117,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
+       if (in_irq_stack(stack, info))
+               goto recursion_check;
+ 
++      if (in_sysenter_stack(stack, info))
++              goto recursion_check;
++
+       goto unknown;
+ 
+ recursion_check:
+-- 
+2.14.2
+
diff --git a/patches/kernel/0146-x86-entry-gdt-Put-per-CPU-GDT-remaps-in-ascending-or.patch b/patches/kernel/0146-x86-entry-gdt-Put-per-CPU-GDT-remaps-in-ascending-or.patch

deleted file mode 100644 (file)

index 91f4090..0000000
--- a/patches/kernel/0146-x86-entry-gdt-Put-per-CPU-GDT-remaps-in-ascending-or.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:14 +0100
-Subject: [PATCH] x86/entry/gdt: Put per-CPU GDT remaps in ascending order
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-We currently have CPU 0's GDT at the top of the GDT range and
-higher-numbered CPUs at lower addresses.  This happens because the
-fixmap is upside down (index 0 is the top of the fixmap).
-
-Flip it so that GDTs are in ascending order by virtual address.
-This will simplify a future patch that will generalize the GDT
-remap to contain multiple pages.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150605.471561421@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit aaeed3aeb39c1ba69f0a49baec8cb728121d0a91)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9c37967fad2d6a525df53e0a40edcd652e5abaae)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/desc.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
-index f995e5a09136..22ee0a93b4f7 100644
---- a/arch/x86/include/asm/desc.h
-+++ b/arch/x86/include/asm/desc.h
-@@ -61,7 +61,7 @@ static inline struct desc_struct *get_current_gdt_rw(void)
- /* Get the fixmap index for a specific processor */
- static inline unsigned int get_cpu_gdt_ro_index(int cpu)
- {
--      return FIX_GDT_REMAP_BEGIN + cpu;
-+      return FIX_GDT_REMAP_END - cpu;
- }
- 
- /* Provide the fixmap address of the remapped GDT */
--- 
-2.14.2
-
diff --git a/patches/kernel/0147-x86-entry-gdt-Put-per-CPU-GDT-remaps-in-ascending-or.patch b/patches/kernel/0147-x86-entry-gdt-Put-per-CPU-GDT-remaps-in-ascending-or.patch

new file mode 100644 (file)

index 0000000..91f4090
--- /dev/null
+++ b/patches/kernel/0147-x86-entry-gdt-Put-per-CPU-GDT-remaps-in-ascending-or.patch
@@ -0,0 +1,70 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:14 +0100
+Subject: [PATCH] x86/entry/gdt: Put per-CPU GDT remaps in ascending order
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+We currently have CPU 0's GDT at the top of the GDT range and
+higher-numbered CPUs at lower addresses.  This happens because the
+fixmap is upside down (index 0 is the top of the fixmap).
+
+Flip it so that GDTs are in ascending order by virtual address.
+This will simplify a future patch that will generalize the GDT
+remap to contain multiple pages.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.471561421@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit aaeed3aeb39c1ba69f0a49baec8cb728121d0a91)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9c37967fad2d6a525df53e0a40edcd652e5abaae)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/desc.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
+index f995e5a09136..22ee0a93b4f7 100644
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -61,7 +61,7 @@ static inline struct desc_struct *get_current_gdt_rw(void)
+ /* Get the fixmap index for a specific processor */
+ static inline unsigned int get_cpu_gdt_ro_index(int cpu)
+ {
+-      return FIX_GDT_REMAP_BEGIN + cpu;
++      return FIX_GDT_REMAP_END - cpu;
+ }
+ 
+ /* Provide the fixmap address of the remapped GDT */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0147-x86-mm-fixmap-Generalize-the-GDT-fixmap-mechanism-in.patch b/patches/kernel/0147-x86-mm-fixmap-Generalize-the-GDT-fixmap-mechanism-in.patch

deleted file mode 100644 (file)

index 7c6edcb..0000000
--- a/patches/kernel/0147-x86-mm-fixmap-Generalize-the-GDT-fixmap-mechanism-in.patch
+++ /dev/null
@@ -1,206 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:15 +0100
-Subject: [PATCH] x86/mm/fixmap: Generalize the GDT fixmap mechanism, introduce
- struct cpu_entry_area
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Currently, the GDT is an ad-hoc array of pages, one per CPU, in the
-fixmap.  Generalize it to be an array of a new 'struct cpu_entry_area'
-so that we can cleanly add new things to it.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150605.563271721@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit ef8813ab280507972bb57e4b1b502811ad4411e9)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit b17894f1ac91491ce29946ed946a129620b7f7ac)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/desc.h   |  9 +--------
- arch/x86/include/asm/fixmap.h | 37 +++++++++++++++++++++++++++++++++++--
- arch/x86/kernel/cpu/common.c  | 14 +++++++-------
- arch/x86/xen/mmu_pv.c         |  2 +-
- 4 files changed, 44 insertions(+), 18 deletions(-)
-
-diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
-index 22ee0a93b4f7..81c9b1e8cae9 100644
---- a/arch/x86/include/asm/desc.h
-+++ b/arch/x86/include/asm/desc.h
-@@ -58,17 +58,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
-       return this_cpu_ptr(&gdt_page)->gdt;
- }
- 
--/* Get the fixmap index for a specific processor */
--static inline unsigned int get_cpu_gdt_ro_index(int cpu)
--{
--      return FIX_GDT_REMAP_END - cpu;
--}
--
- /* Provide the fixmap address of the remapped GDT */
- static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
- {
--      unsigned int idx = get_cpu_gdt_ro_index(cpu);
--      return (struct desc_struct *)__fix_to_virt(idx);
-+      return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
- }
- 
- /* Provide the current read-only GDT */
-diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
-index 81c2b11f50a6..8c6ed66fe957 100644
---- a/arch/x86/include/asm/fixmap.h
-+++ b/arch/x86/include/asm/fixmap.h
-@@ -44,6 +44,19 @@ extern unsigned long __FIXADDR_TOP;
-                        PAGE_SIZE)
- #endif
- 
-+/*
-+ * cpu_entry_area is a percpu region in the fixmap that contains things
-+ * needed by the CPU and early entry/exit code.  Real types aren't used
-+ * for all fields here to avoid circular header dependencies.
-+ *
-+ * Every field is a virtual alias of some other allocated backing store.
-+ * There is no direct allocation of a struct cpu_entry_area.
-+ */
-+struct cpu_entry_area {
-+      char gdt[PAGE_SIZE];
-+};
-+
-+#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
- 
- /*
-  * Here we define all the compile-time 'special' virtual
-@@ -101,8 +114,8 @@ enum fixed_addresses {
-       FIX_LNW_VRTC,
- #endif
-       /* Fixmap entries to remap the GDTs, one per processor. */
--      FIX_GDT_REMAP_BEGIN,
--      FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
-+      FIX_CPU_ENTRY_AREA_TOP,
-+      FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
- 
- #ifdef CONFIG_ACPI_APEI_GHES
-       /* Used for GHES mapping from assorted contexts */
-@@ -171,5 +184,25 @@ static inline void __set_fixmap(enum fixed_addresses idx,
- void __early_set_fixmap(enum fixed_addresses idx,
-                       phys_addr_t phys, pgprot_t flags);
- 
-+static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
-+{
-+      BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
-+
-+      return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
-+}
-+
-+#define __get_cpu_entry_area_offset_index(cpu, offset) ({             \
-+      BUILD_BUG_ON(offset % PAGE_SIZE != 0);                          \
-+      __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE);       \
-+      })
-+
-+#define get_cpu_entry_area_index(cpu, field)                          \
-+      __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
-+
-+static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
-+{
-+      return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
-+}
-+
- #endif /* !__ASSEMBLY__ */
- #endif /* _ASM_X86_FIXMAP_H */
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index aa97e4cd3a33..ffee73ec1af1 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -466,12 +466,12 @@ void load_percpu_segment(int cpu)
-       load_stack_canary_segment();
- }
- 
--/* Setup the fixmap mapping only once per-processor */
--static inline void setup_fixmap_gdt(int cpu)
-+/* Setup the fixmap mappings only once per-processor */
-+static inline void setup_cpu_entry_area(int cpu)
- {
- #ifdef CONFIG_X86_64
-       /* On 64-bit systems, we use a read-only fixmap GDT. */
--      pgprot_t prot = PAGE_KERNEL_RO;
-+      pgprot_t gdt_prot = PAGE_KERNEL_RO;
- #else
-       /*
-        * On native 32-bit systems, the GDT cannot be read-only because
-@@ -482,11 +482,11 @@ static inline void setup_fixmap_gdt(int cpu)
-        * On Xen PV, the GDT must be read-only because the hypervisor requires
-        * it.
-        */
--      pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
-+      pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
-               PAGE_KERNEL_RO : PAGE_KERNEL;
- #endif
- 
--      __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
-+      __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
- }
- 
- /* Load the original GDT from the per-cpu structure */
-@@ -1589,7 +1589,7 @@ void cpu_init(void)
-       if (is_uv_system())
-               uv_cpu_init();
- 
--      setup_fixmap_gdt(cpu);
-+      setup_cpu_entry_area(cpu);
-       load_fixmap_gdt(cpu);
- }
- 
-@@ -1650,7 +1650,7 @@ void cpu_init(void)
- 
-       fpu__init_cpu();
- 
--      setup_fixmap_gdt(cpu);
-+      setup_cpu_entry_area(cpu);
-       load_fixmap_gdt(cpu);
- }
- #endif
-diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
-index 45bb2d462e44..53e65f605bdd 100644
---- a/arch/x86/xen/mmu_pv.c
-+++ b/arch/x86/xen/mmu_pv.c
-@@ -2297,7 +2297,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
- #endif
-       case FIX_TEXT_POKE0:
-       case FIX_TEXT_POKE1:
--      case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
-+      case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
-               /* All local page mappings */
-               pte = pfn_pte(phys, prot);
-               break;
--- 
-2.14.2
-
diff --git a/patches/kernel/0148-x86-kasan-64-Teach-KASAN-about-the-cpu_entry_area.patch b/patches/kernel/0148-x86-kasan-64-Teach-KASAN-about-the-cpu_entry_area.patch

deleted file mode 100644 (file)

index d7de3f3..0000000
--- a/patches/kernel/0148-x86-kasan-64-Teach-KASAN-about-the-cpu_entry_area.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:16 +0100
-Subject: [PATCH] x86/kasan/64: Teach KASAN about the cpu_entry_area
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The cpu_entry_area will contain stacks.  Make sure that KASAN has
-appropriate shadow mappings for them.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Alexander Potapenko <glider@google.com>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Dmitry Vyukov <dvyukov@google.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: kasan-dev@googlegroups.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150605.642806442@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 21506525fb8ddb0342f2a2370812d47f6a1f3833)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 17833d4cfca7e4284f68fb9f3804a91f2541a83a)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/kasan_init_64.c | 18 +++++++++++++++++-
- 1 file changed, 17 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
-index 3d7341986e13..d8836e45bc07 100644
---- a/arch/x86/mm/kasan_init_64.c
-+++ b/arch/x86/mm/kasan_init_64.c
-@@ -276,6 +276,7 @@ void __init kasan_early_init(void)
- void __init kasan_init(void)
- {
-       int i;
-+      void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
- 
- #ifdef CONFIG_KASAN_INLINE
-       register_die_notifier(&kasan_die_notifier);
-@@ -328,8 +329,23 @@ void __init kasan_init(void)
-                             (unsigned long)kasan_mem_to_shadow(_end),
-                             early_pfn_to_nid(__pa(_stext)));
- 
-+      shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
-+      shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
-+      shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
-+                                              PAGE_SIZE);
-+
-+      shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
-+      shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
-+      shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
-+                                      PAGE_SIZE);
-+
-       kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
--                      (void *)KASAN_SHADOW_END);
-+                                 shadow_cpu_entry_begin);
-+
-+      kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
-+                            (unsigned long)shadow_cpu_entry_end, 0);
-+
-+      kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
- 
-       load_cr3(init_top_pgt);
-       __flush_tlb_all();
--- 
-2.14.2
-
diff --git a/patches/kernel/0148-x86-mm-fixmap-Generalize-the-GDT-fixmap-mechanism-in.patch b/patches/kernel/0148-x86-mm-fixmap-Generalize-the-GDT-fixmap-mechanism-in.patch

new file mode 100644 (file)

index 0000000..7c6edcb
--- /dev/null
+++ b/patches/kernel/0148-x86-mm-fixmap-Generalize-the-GDT-fixmap-mechanism-in.patch
@@ -0,0 +1,206 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:15 +0100
+Subject: [PATCH] x86/mm/fixmap: Generalize the GDT fixmap mechanism, introduce
+ struct cpu_entry_area
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Currently, the GDT is an ad-hoc array of pages, one per CPU, in the
+fixmap.  Generalize it to be an array of a new 'struct cpu_entry_area'
+so that we can cleanly add new things to it.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.563271721@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit ef8813ab280507972bb57e4b1b502811ad4411e9)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit b17894f1ac91491ce29946ed946a129620b7f7ac)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/desc.h   |  9 +--------
+ arch/x86/include/asm/fixmap.h | 37 +++++++++++++++++++++++++++++++++++--
+ arch/x86/kernel/cpu/common.c  | 14 +++++++-------
+ arch/x86/xen/mmu_pv.c         |  2 +-
+ 4 files changed, 44 insertions(+), 18 deletions(-)
+
+diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
+index 22ee0a93b4f7..81c9b1e8cae9 100644
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -58,17 +58,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
+       return this_cpu_ptr(&gdt_page)->gdt;
+ }
+ 
+-/* Get the fixmap index for a specific processor */
+-static inline unsigned int get_cpu_gdt_ro_index(int cpu)
+-{
+-      return FIX_GDT_REMAP_END - cpu;
+-}
+-
+ /* Provide the fixmap address of the remapped GDT */
+ static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
+ {
+-      unsigned int idx = get_cpu_gdt_ro_index(cpu);
+-      return (struct desc_struct *)__fix_to_virt(idx);
++      return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
+ }
+ 
+ /* Provide the current read-only GDT */
+diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
+index 81c2b11f50a6..8c6ed66fe957 100644
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -44,6 +44,19 @@ extern unsigned long __FIXADDR_TOP;
+                        PAGE_SIZE)
+ #endif
+ 
++/*
++ * cpu_entry_area is a percpu region in the fixmap that contains things
++ * needed by the CPU and early entry/exit code.  Real types aren't used
++ * for all fields here to avoid circular header dependencies.
++ *
++ * Every field is a virtual alias of some other allocated backing store.
++ * There is no direct allocation of a struct cpu_entry_area.
++ */
++struct cpu_entry_area {
++      char gdt[PAGE_SIZE];
++};
++
++#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
+ 
+ /*
+  * Here we define all the compile-time 'special' virtual
+@@ -101,8 +114,8 @@ enum fixed_addresses {
+       FIX_LNW_VRTC,
+ #endif
+       /* Fixmap entries to remap the GDTs, one per processor. */
+-      FIX_GDT_REMAP_BEGIN,
+-      FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
++      FIX_CPU_ENTRY_AREA_TOP,
++      FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
+ 
+ #ifdef CONFIG_ACPI_APEI_GHES
+       /* Used for GHES mapping from assorted contexts */
+@@ -171,5 +184,25 @@ static inline void __set_fixmap(enum fixed_addresses idx,
+ void __early_set_fixmap(enum fixed_addresses idx,
+                       phys_addr_t phys, pgprot_t flags);
+ 
++static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
++{
++      BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
++
++      return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
++}
++
++#define __get_cpu_entry_area_offset_index(cpu, offset) ({             \
++      BUILD_BUG_ON(offset % PAGE_SIZE != 0);                          \
++      __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE);       \
++      })
++
++#define get_cpu_entry_area_index(cpu, field)                          \
++      __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
++
++static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
++{
++      return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
++}
++
+ #endif /* !__ASSEMBLY__ */
+ #endif /* _ASM_X86_FIXMAP_H */
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index aa97e4cd3a33..ffee73ec1af1 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -466,12 +466,12 @@ void load_percpu_segment(int cpu)
+       load_stack_canary_segment();
+ }
+ 
+-/* Setup the fixmap mapping only once per-processor */
+-static inline void setup_fixmap_gdt(int cpu)
++/* Setup the fixmap mappings only once per-processor */
++static inline void setup_cpu_entry_area(int cpu)
+ {
+ #ifdef CONFIG_X86_64
+       /* On 64-bit systems, we use a read-only fixmap GDT. */
+-      pgprot_t prot = PAGE_KERNEL_RO;
++      pgprot_t gdt_prot = PAGE_KERNEL_RO;
+ #else
+       /*
+        * On native 32-bit systems, the GDT cannot be read-only because
+@@ -482,11 +482,11 @@ static inline void setup_fixmap_gdt(int cpu)
+        * On Xen PV, the GDT must be read-only because the hypervisor requires
+        * it.
+        */
+-      pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
++      pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+               PAGE_KERNEL_RO : PAGE_KERNEL;
+ #endif
+ 
+-      __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
++      __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+ }
+ 
+ /* Load the original GDT from the per-cpu structure */
+@@ -1589,7 +1589,7 @@ void cpu_init(void)
+       if (is_uv_system())
+               uv_cpu_init();
+ 
+-      setup_fixmap_gdt(cpu);
++      setup_cpu_entry_area(cpu);
+       load_fixmap_gdt(cpu);
+ }
+ 
+@@ -1650,7 +1650,7 @@ void cpu_init(void)
+ 
+       fpu__init_cpu();
+ 
+-      setup_fixmap_gdt(cpu);
++      setup_cpu_entry_area(cpu);
+       load_fixmap_gdt(cpu);
+ }
+ #endif
+diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
+index 45bb2d462e44..53e65f605bdd 100644
+--- a/arch/x86/xen/mmu_pv.c
++++ b/arch/x86/xen/mmu_pv.c
+@@ -2297,7 +2297,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
+ #endif
+       case FIX_TEXT_POKE0:
+       case FIX_TEXT_POKE1:
+-      case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
++      case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
+               /* All local page mappings */
+               pte = pfn_pte(phys, prot);
+               break;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0149-x86-entry-Fix-assumptions-that-the-HW-TSS-is-at-the-.patch b/patches/kernel/0149-x86-entry-Fix-assumptions-that-the-HW-TSS-is-at-the-.patch

deleted file mode 100644 (file)

index 0c46932..0000000
--- a/patches/kernel/0149-x86-entry-Fix-assumptions-that-the-HW-TSS-is-at-the-.patch
+++ /dev/null
@@ -1,227 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:17 +0100
-Subject: [PATCH] x86/entry: Fix assumptions that the HW TSS is at the
- beginning of cpu_tss
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-A future patch will move SYSENTER_stack to the beginning of cpu_tss
-to help detect overflow.  Before this can happen, fix several code
-paths that hardcode assumptions about the old layout.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Reviewed-by: Dave Hansen <dave.hansen@intel.com>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150605.722425540@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit 7fb983b4dd569e08564134a850dfd4eb1c63d9b8)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 7123a5de72dc59dea18ce8886e7db726f7259caf)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/desc.h      |  2 +-
- arch/x86/include/asm/processor.h |  9 +++++++--
- arch/x86/kernel/cpu/common.c     |  8 ++++----
- arch/x86/kernel/doublefault.c    | 36 +++++++++++++++++-------------------
- arch/x86/kvm/vmx.c               |  2 +-
- arch/x86/power/cpu.c             | 13 +++++++------
- 6 files changed, 37 insertions(+), 33 deletions(-)
-
-diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
-index 81c9b1e8cae9..b817fe247506 100644
---- a/arch/x86/include/asm/desc.h
-+++ b/arch/x86/include/asm/desc.h
-@@ -190,7 +190,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
- #endif
- }
- 
--static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
-+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
- {
-       struct desc_struct *d = get_cpu_gdt_rw(cpu);
-       tss_desc tss;
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 5225917f9760..78123abdb046 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -161,7 +161,7 @@ extern struct cpuinfo_x86  new_cpu_data;
- 
- #include <linux/thread_info.h>
- 
--extern struct tss_struct      doublefault_tss;
-+extern struct x86_hw_tss      doublefault_tss;
- extern __u32                  cpu_caps_cleared[NCAPINTS];
- extern __u32                  cpu_caps_set[NCAPINTS];
- 
-@@ -246,6 +246,11 @@ static inline void load_cr3(pgd_t *pgdir)
-       write_cr3(__pa(pgdir));
- }
- 
-+/*
-+ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
-+ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
-+ * unrelated to the task-switch mechanism:
-+ */
- #ifdef CONFIG_X86_32
- /* This is the TSS defined by the hardware. */
- struct x86_hw_tss {
-@@ -316,7 +321,7 @@ struct x86_hw_tss {
- #define IO_BITMAP_BITS                        65536
- #define IO_BITMAP_BYTES                       (IO_BITMAP_BITS/8)
- #define IO_BITMAP_LONGS                       (IO_BITMAP_BYTES/sizeof(long))
--#define IO_BITMAP_OFFSET              offsetof(struct tss_struct, io_bitmap)
-+#define IO_BITMAP_OFFSET              (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
- #define INVALID_IO_BITMAP_OFFSET      0x8000
- 
- struct tss_struct {
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index ffee73ec1af1..e526d82b546c 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -1558,7 +1558,7 @@ void cpu_init(void)
-               }
-       }
- 
--      t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
-+      t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
- 
-       /*
-        * <= is required because the CPU will access up to
-@@ -1576,7 +1576,7 @@ void cpu_init(void)
-        * Initialize the TSS.  Don't bother initializing sp0, as the initial
-        * task never enters user mode.
-        */
--      set_tss_desc(cpu, t);
-+      set_tss_desc(cpu, &t->x86_tss);
-       load_TR_desc();
- 
-       load_mm_ldt(&init_mm);
-@@ -1633,12 +1633,12 @@ void cpu_init(void)
-        * Initialize the TSS.  Don't bother initializing sp0, as the initial
-        * task never enters user mode.
-        */
--      set_tss_desc(cpu, t);
-+      set_tss_desc(cpu, &t->x86_tss);
-       load_TR_desc();
- 
-       load_mm_ldt(&init_mm);
- 
--      t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
-+      t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
- 
- #ifdef CONFIG_DOUBLEFAULT
-       /* Set up doublefault TSS pointer in the GDT */
-diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
-index f9c324e08d85..a9fe79d49d39 100644
---- a/arch/x86/kernel/doublefault.c
-+++ b/arch/x86/kernel/doublefault.c
-@@ -49,25 +49,23 @@ static void doublefault_fn(void)
-               cpu_relax();
- }
- 
--struct tss_struct doublefault_tss __cacheline_aligned = {
--      .x86_tss = {
--              .sp0            = STACK_START,
--              .ss0            = __KERNEL_DS,
--              .ldt            = 0,
--              .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
--
--              .ip             = (unsigned long) doublefault_fn,
--              /* 0x2 bit is always set */
--              .flags          = X86_EFLAGS_SF | 0x2,
--              .sp             = STACK_START,
--              .es             = __USER_DS,
--              .cs             = __KERNEL_CS,
--              .ss             = __KERNEL_DS,
--              .ds             = __USER_DS,
--              .fs             = __KERNEL_PERCPU,
--
--              .__cr3          = __pa_nodebug(swapper_pg_dir),
--      }
-+struct x86_hw_tss doublefault_tss __cacheline_aligned = {
-+      .sp0            = STACK_START,
-+      .ss0            = __KERNEL_DS,
-+      .ldt            = 0,
-+      .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
-+
-+      .ip             = (unsigned long) doublefault_fn,
-+      /* 0x2 bit is always set */
-+      .flags          = X86_EFLAGS_SF | 0x2,
-+      .sp             = STACK_START,
-+      .es             = __USER_DS,
-+      .cs             = __KERNEL_CS,
-+      .ss             = __KERNEL_DS,
-+      .ds             = __USER_DS,
-+      .fs             = __KERNEL_PERCPU,
-+
-+      .__cr3          = __pa_nodebug(swapper_pg_dir),
- };
- 
- /* dummy for do_double_fault() call */
-diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
-index dd4996a96c71..a7c5a47beab7 100644
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -2280,7 +2280,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-                * processors.  See 22.2.4.
-                */
-               vmcs_writel(HOST_TR_BASE,
--                          (unsigned long)this_cpu_ptr(&cpu_tss));
-+                          (unsigned long)this_cpu_ptr(&cpu_tss.x86_tss));
-               vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
- 
-               /*
-diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
-index 78459a6d455a..48cd87fc7222 100644
---- a/arch/x86/power/cpu.c
-+++ b/arch/x86/power/cpu.c
-@@ -165,12 +165,13 @@ static void fix_processor_context(void)
-       struct desc_struct *desc = get_cpu_gdt_rw(cpu);
-       tss_desc tss;
- #endif
--      set_tss_desc(cpu, t);   /*
--                               * This just modifies memory; should not be
--                               * necessary. But... This is necessary, because
--                               * 386 hardware has concept of busy TSS or some
--                               * similar stupidity.
--                               */
-+
-+      /*
-+       * This just modifies memory; should not be necessary. But... This is
-+       * necessary, because 386 hardware has concept of busy TSS or some
-+       * similar stupidity.
-+       */
-+      set_tss_desc(cpu, &t->x86_tss);
- 
- #ifdef CONFIG_X86_64
-       memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
--- 
-2.14.2
-
diff --git a/patches/kernel/0149-x86-kasan-64-Teach-KASAN-about-the-cpu_entry_area.patch b/patches/kernel/0149-x86-kasan-64-Teach-KASAN-about-the-cpu_entry_area.patch

new file mode 100644 (file)

index 0000000..d7de3f3
--- /dev/null
+++ b/patches/kernel/0149-x86-kasan-64-Teach-KASAN-about-the-cpu_entry_area.patch
@@ -0,0 +1,91 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:16 +0100
+Subject: [PATCH] x86/kasan/64: Teach KASAN about the cpu_entry_area
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The cpu_entry_area will contain stacks.  Make sure that KASAN has
+appropriate shadow mappings for them.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: kasan-dev@googlegroups.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.642806442@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 21506525fb8ddb0342f2a2370812d47f6a1f3833)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 17833d4cfca7e4284f68fb9f3804a91f2541a83a)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/kasan_init_64.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
+index 3d7341986e13..d8836e45bc07 100644
+--- a/arch/x86/mm/kasan_init_64.c
++++ b/arch/x86/mm/kasan_init_64.c
+@@ -276,6 +276,7 @@ void __init kasan_early_init(void)
+ void __init kasan_init(void)
+ {
+       int i;
++      void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
+ 
+ #ifdef CONFIG_KASAN_INLINE
+       register_die_notifier(&kasan_die_notifier);
+@@ -328,8 +329,23 @@ void __init kasan_init(void)
+                             (unsigned long)kasan_mem_to_shadow(_end),
+                             early_pfn_to_nid(__pa(_stext)));
+ 
++      shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
++      shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
++      shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
++                                              PAGE_SIZE);
++
++      shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
++      shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
++      shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
++                                      PAGE_SIZE);
++
+       kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
+-                      (void *)KASAN_SHADOW_END);
++                                 shadow_cpu_entry_begin);
++
++      kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
++                            (unsigned long)shadow_cpu_entry_end, 0);
++
++      kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
+ 
+       load_cr3(init_top_pgt);
+       __flush_tlb_all();
+-- 
+2.14.2
+
diff --git a/patches/kernel/0150-x86-dumpstack-Handle-stack-overflow-on-all-stacks.patch b/patches/kernel/0150-x86-dumpstack-Handle-stack-overflow-on-all-stacks.patch

deleted file mode 100644 (file)

index acd41d1..0000000
--- a/patches/kernel/0150-x86-dumpstack-Handle-stack-overflow-on-all-stacks.patch
+++ /dev/null
@@ -1,96 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:18 +0100
-Subject: [PATCH] x86/dumpstack: Handle stack overflow on all stacks
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-We currently special-case stack overflow on the task stack.  We're
-going to start putting special stacks in the fixmap with a custom
-layout, so they'll have guard pages, too.  Teach the unwinder to be
-able to unwind an overflow of any of the stacks.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150605.802057305@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 6e60e583426c2f8751c22c2dfe5c207083b4483a)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 1ab51120b9a5baaa46979e4ab8ff28916c9cb846)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/dumpstack.c | 24 ++++++++++++++----------
- 1 file changed, 14 insertions(+), 10 deletions(-)
-
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index c211cbdff709..0f4b931e1a02 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -112,24 +112,28 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-        * - task stack
-        * - interrupt stack
-        * - HW exception stacks (double fault, nmi, debug, mce)
-+       * - SYSENTER stack
-        *
--       * x86-32 can have up to three stacks:
-+       * x86-32 can have up to four stacks:
-        * - task stack
-        * - softirq stack
-        * - hardirq stack
-+       * - SYSENTER stack
-        */
-       for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
-               const char *stack_name;
- 
--              /*
--               * If we overflowed the task stack into a guard page, jump back
--               * to the bottom of the usable stack.
--               */
--              if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
--                      stack = task_stack_page(task);
--
--              if (get_stack_info(stack, task, &stack_info, &visit_mask))
--                      break;
-+              if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
-+                      /*
-+                       * We weren't on a valid stack.  It's possible that
-+                       * we overflowed a valid stack into a guard page.
-+                       * See if the next page up is valid so that we can
-+                       * generate some kind of backtrace if this happens.
-+                       */
-+                      stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
-+                      if (get_stack_info(stack, task, &stack_info, &visit_mask))
-+                              break;
-+              }
- 
-               stack_name = stack_type_name(stack_info.type);
-               if (stack_name)
--- 
-2.14.2
-
diff --git a/patches/kernel/0150-x86-entry-Fix-assumptions-that-the-HW-TSS-is-at-the-.patch b/patches/kernel/0150-x86-entry-Fix-assumptions-that-the-HW-TSS-is-at-the-.patch

new file mode 100644 (file)

index 0000000..0c46932
--- /dev/null
+++ b/patches/kernel/0150-x86-entry-Fix-assumptions-that-the-HW-TSS-is-at-the-.patch
@@ -0,0 +1,227 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:17 +0100
+Subject: [PATCH] x86/entry: Fix assumptions that the HW TSS is at the
+ beginning of cpu_tss
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+A future patch will move SYSENTER_stack to the beginning of cpu_tss
+to help detect overflow.  Before this can happen, fix several code
+paths that hardcode assumptions about the old layout.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Dave Hansen <dave.hansen@intel.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.722425540@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit 7fb983b4dd569e08564134a850dfd4eb1c63d9b8)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 7123a5de72dc59dea18ce8886e7db726f7259caf)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/desc.h      |  2 +-
+ arch/x86/include/asm/processor.h |  9 +++++++--
+ arch/x86/kernel/cpu/common.c     |  8 ++++----
+ arch/x86/kernel/doublefault.c    | 36 +++++++++++++++++-------------------
+ arch/x86/kvm/vmx.c               |  2 +-
+ arch/x86/power/cpu.c             | 13 +++++++------
+ 6 files changed, 37 insertions(+), 33 deletions(-)
+
+diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
+index 81c9b1e8cae9..b817fe247506 100644
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -190,7 +190,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
+ #endif
+ }
+ 
+-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
++static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
+ {
+       struct desc_struct *d = get_cpu_gdt_rw(cpu);
+       tss_desc tss;
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 5225917f9760..78123abdb046 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -161,7 +161,7 @@ extern struct cpuinfo_x86  new_cpu_data;
+ 
+ #include <linux/thread_info.h>
+ 
+-extern struct tss_struct      doublefault_tss;
++extern struct x86_hw_tss      doublefault_tss;
+ extern __u32                  cpu_caps_cleared[NCAPINTS];
+ extern __u32                  cpu_caps_set[NCAPINTS];
+ 
+@@ -246,6 +246,11 @@ static inline void load_cr3(pgd_t *pgdir)
+       write_cr3(__pa(pgdir));
+ }
+ 
++/*
++ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
++ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
++ * unrelated to the task-switch mechanism:
++ */
+ #ifdef CONFIG_X86_32
+ /* This is the TSS defined by the hardware. */
+ struct x86_hw_tss {
+@@ -316,7 +321,7 @@ struct x86_hw_tss {
+ #define IO_BITMAP_BITS                        65536
+ #define IO_BITMAP_BYTES                       (IO_BITMAP_BITS/8)
+ #define IO_BITMAP_LONGS                       (IO_BITMAP_BYTES/sizeof(long))
+-#define IO_BITMAP_OFFSET              offsetof(struct tss_struct, io_bitmap)
++#define IO_BITMAP_OFFSET              (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
+ #define INVALID_IO_BITMAP_OFFSET      0x8000
+ 
+ struct tss_struct {
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index ffee73ec1af1..e526d82b546c 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1558,7 +1558,7 @@ void cpu_init(void)
+               }
+       }
+ 
+-      t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
++      t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
+ 
+       /*
+        * <= is required because the CPU will access up to
+@@ -1576,7 +1576,7 @@ void cpu_init(void)
+        * Initialize the TSS.  Don't bother initializing sp0, as the initial
+        * task never enters user mode.
+        */
+-      set_tss_desc(cpu, t);
++      set_tss_desc(cpu, &t->x86_tss);
+       load_TR_desc();
+ 
+       load_mm_ldt(&init_mm);
+@@ -1633,12 +1633,12 @@ void cpu_init(void)
+        * Initialize the TSS.  Don't bother initializing sp0, as the initial
+        * task never enters user mode.
+        */
+-      set_tss_desc(cpu, t);
++      set_tss_desc(cpu, &t->x86_tss);
+       load_TR_desc();
+ 
+       load_mm_ldt(&init_mm);
+ 
+-      t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
++      t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
+ 
+ #ifdef CONFIG_DOUBLEFAULT
+       /* Set up doublefault TSS pointer in the GDT */
+diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
+index f9c324e08d85..a9fe79d49d39 100644
+--- a/arch/x86/kernel/doublefault.c
++++ b/arch/x86/kernel/doublefault.c
+@@ -49,25 +49,23 @@ static void doublefault_fn(void)
+               cpu_relax();
+ }
+ 
+-struct tss_struct doublefault_tss __cacheline_aligned = {
+-      .x86_tss = {
+-              .sp0            = STACK_START,
+-              .ss0            = __KERNEL_DS,
+-              .ldt            = 0,
+-              .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+-
+-              .ip             = (unsigned long) doublefault_fn,
+-              /* 0x2 bit is always set */
+-              .flags          = X86_EFLAGS_SF | 0x2,
+-              .sp             = STACK_START,
+-              .es             = __USER_DS,
+-              .cs             = __KERNEL_CS,
+-              .ss             = __KERNEL_DS,
+-              .ds             = __USER_DS,
+-              .fs             = __KERNEL_PERCPU,
+-
+-              .__cr3          = __pa_nodebug(swapper_pg_dir),
+-      }
++struct x86_hw_tss doublefault_tss __cacheline_aligned = {
++      .sp0            = STACK_START,
++      .ss0            = __KERNEL_DS,
++      .ldt            = 0,
++      .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
++
++      .ip             = (unsigned long) doublefault_fn,
++      /* 0x2 bit is always set */
++      .flags          = X86_EFLAGS_SF | 0x2,
++      .sp             = STACK_START,
++      .es             = __USER_DS,
++      .cs             = __KERNEL_CS,
++      .ss             = __KERNEL_DS,
++      .ds             = __USER_DS,
++      .fs             = __KERNEL_PERCPU,
++
++      .__cr3          = __pa_nodebug(swapper_pg_dir),
+ };
+ 
+ /* dummy for do_double_fault() call */
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index dd4996a96c71..a7c5a47beab7 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2280,7 +2280,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+                * processors.  See 22.2.4.
+                */
+               vmcs_writel(HOST_TR_BASE,
+-                          (unsigned long)this_cpu_ptr(&cpu_tss));
++                          (unsigned long)this_cpu_ptr(&cpu_tss.x86_tss));
+               vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
+ 
+               /*
+diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
+index 78459a6d455a..48cd87fc7222 100644
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -165,12 +165,13 @@ static void fix_processor_context(void)
+       struct desc_struct *desc = get_cpu_gdt_rw(cpu);
+       tss_desc tss;
+ #endif
+-      set_tss_desc(cpu, t);   /*
+-                               * This just modifies memory; should not be
+-                               * necessary. But... This is necessary, because
+-                               * 386 hardware has concept of busy TSS or some
+-                               * similar stupidity.
+-                               */
++
++      /*
++       * This just modifies memory; should not be necessary. But... This is
++       * necessary, because 386 hardware has concept of busy TSS or some
++       * similar stupidity.
++       */
++      set_tss_desc(cpu, &t->x86_tss);
+ 
+ #ifdef CONFIG_X86_64
+       memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
+-- 
+2.14.2
+
diff --git a/patches/kernel/0151-x86-dumpstack-Handle-stack-overflow-on-all-stacks.patch b/patches/kernel/0151-x86-dumpstack-Handle-stack-overflow-on-all-stacks.patch

new file mode 100644 (file)

index 0000000..acd41d1
--- /dev/null
+++ b/patches/kernel/0151-x86-dumpstack-Handle-stack-overflow-on-all-stacks.patch
@@ -0,0 +1,96 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:18 +0100
+Subject: [PATCH] x86/dumpstack: Handle stack overflow on all stacks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+We currently special-case stack overflow on the task stack.  We're
+going to start putting special stacks in the fixmap with a custom
+layout, so they'll have guard pages, too.  Teach the unwinder to be
+able to unwind an overflow of any of the stacks.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.802057305@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 6e60e583426c2f8751c22c2dfe5c207083b4483a)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 1ab51120b9a5baaa46979e4ab8ff28916c9cb846)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/dumpstack.c | 24 ++++++++++++++----------
+ 1 file changed, 14 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index c211cbdff709..0f4b931e1a02 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -112,24 +112,28 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+        * - task stack
+        * - interrupt stack
+        * - HW exception stacks (double fault, nmi, debug, mce)
++       * - SYSENTER stack
+        *
+-       * x86-32 can have up to three stacks:
++       * x86-32 can have up to four stacks:
+        * - task stack
+        * - softirq stack
+        * - hardirq stack
++       * - SYSENTER stack
+        */
+       for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+               const char *stack_name;
+ 
+-              /*
+-               * If we overflowed the task stack into a guard page, jump back
+-               * to the bottom of the usable stack.
+-               */
+-              if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
+-                      stack = task_stack_page(task);
+-
+-              if (get_stack_info(stack, task, &stack_info, &visit_mask))
+-                      break;
++              if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
++                      /*
++                       * We weren't on a valid stack.  It's possible that
++                       * we overflowed a valid stack into a guard page.
++                       * See if the next page up is valid so that we can
++                       * generate some kind of backtrace if this happens.
++                       */
++                      stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
++                      if (get_stack_info(stack, task, &stack_info, &visit_mask))
++                              break;
++              }
+ 
+               stack_name = stack_type_name(stack_info.type);
+               if (stack_name)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0151-x86-entry-Move-SYSENTER_stack-to-the-beginning-of-st.patch b/patches/kernel/0151-x86-entry-Move-SYSENTER_stack-to-the-beginning-of-st.patch

deleted file mode 100644 (file)

index 84662ec..0000000
--- a/patches/kernel/0151-x86-entry-Move-SYSENTER_stack-to-the-beginning-of-st.patch
+++ /dev/null
@@ -1,130 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:19 +0100
-Subject: [PATCH] x86/entry: Move SYSENTER_stack to the beginning of struct
- tss_struct
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-SYSENTER_stack should have reliable overflow detection, which
-means that it needs to be at the bottom of a page, not the top.
-Move it to the beginning of struct tss_struct and page-align it.
-
-Also add an assertion to make sure that the fixed hardware TSS
-doesn't cross a page boundary.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150605.881827433@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 1a935bc3d4ea61556461a9e92a68ca3556232efd)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 57d6cfd9e7d015aabbed6d0b50e7d2525b3c86c2)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/processor.h | 21 ++++++++++++---------
- arch/x86/kernel/cpu/common.c     | 21 +++++++++++++++++++++
- 2 files changed, 33 insertions(+), 9 deletions(-)
-
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 78123abdb046..55885465c3a7 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -326,7 +326,16 @@ struct x86_hw_tss {
- 
- struct tss_struct {
-       /*
--       * The hardware state:
-+       * Space for the temporary SYSENTER stack, used for SYSENTER
-+       * and the entry trampoline as well.
-+       */
-+      unsigned long           SYSENTER_stack_canary;
-+      unsigned long           SYSENTER_stack[64];
-+
-+      /*
-+       * The fixed hardware portion.  This must not cross a page boundary
-+       * at risk of violating the SDM's advice and potentially triggering
-+       * errata.
-        */
-       struct x86_hw_tss       x86_tss;
- 
-@@ -337,15 +346,9 @@ struct tss_struct {
-        * be within the limit.
-        */
-       unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
-+} __aligned(PAGE_SIZE);
- 
--      /*
--       * Space for the temporary SYSENTER stack.
--       */
--      unsigned long           SYSENTER_stack_canary;
--      unsigned long           SYSENTER_stack[64];
--} ____cacheline_aligned;
--
--DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
-+DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss);
- 
- /*
-  * sizeof(unsigned long) coming from an extra "long" at the end
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index e526d82b546c..e61eff11f562 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -487,6 +487,27 @@ static inline void setup_cpu_entry_area(int cpu)
- #endif
- 
-       __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
-+
-+      /*
-+       * The Intel SDM says (Volume 3, 7.2.1):
-+       *
-+       *  Avoid placing a page boundary in the part of the TSS that the
-+       *  processor reads during a task switch (the first 104 bytes). The
-+       *  processor may not correctly perform address translations if a
-+       *  boundary occurs in this area. During a task switch, the processor
-+       *  reads and writes into the first 104 bytes of each TSS (using
-+       *  contiguous physical addresses beginning with the physical address
-+       *  of the first byte of the TSS). So, after TSS access begins, if
-+       *  part of the 104 bytes is not physically contiguous, the processor
-+       *  will access incorrect information without generating a page-fault
-+       *  exception.
-+       *
-+       * There are also a lot of errata involving the TSS spanning a page
-+       * boundary.  Assert that we're not doing that.
-+       */
-+      BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
-+                    offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
-+
- }
- 
- /* Load the original GDT from the per-cpu structure */
--- 
-2.14.2
-
diff --git a/patches/kernel/0152-x86-entry-Move-SYSENTER_stack-to-the-beginning-of-st.patch b/patches/kernel/0152-x86-entry-Move-SYSENTER_stack-to-the-beginning-of-st.patch

new file mode 100644 (file)

index 0000000..84662ec
--- /dev/null
+++ b/patches/kernel/0152-x86-entry-Move-SYSENTER_stack-to-the-beginning-of-st.patch
@@ -0,0 +1,130 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:19 +0100
+Subject: [PATCH] x86/entry: Move SYSENTER_stack to the beginning of struct
+ tss_struct
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+SYSENTER_stack should have reliable overflow detection, which
+means that it needs to be at the bottom of a page, not the top.
+Move it to the beginning of struct tss_struct and page-align it.
+
+Also add an assertion to make sure that the fixed hardware TSS
+doesn't cross a page boundary.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.881827433@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 1a935bc3d4ea61556461a9e92a68ca3556232efd)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 57d6cfd9e7d015aabbed6d0b50e7d2525b3c86c2)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/processor.h | 21 ++++++++++++---------
+ arch/x86/kernel/cpu/common.c     | 21 +++++++++++++++++++++
+ 2 files changed, 33 insertions(+), 9 deletions(-)
+
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 78123abdb046..55885465c3a7 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -326,7 +326,16 @@ struct x86_hw_tss {
+ 
+ struct tss_struct {
+       /*
+-       * The hardware state:
++       * Space for the temporary SYSENTER stack, used for SYSENTER
++       * and the entry trampoline as well.
++       */
++      unsigned long           SYSENTER_stack_canary;
++      unsigned long           SYSENTER_stack[64];
++
++      /*
++       * The fixed hardware portion.  This must not cross a page boundary
++       * at risk of violating the SDM's advice and potentially triggering
++       * errata.
+        */
+       struct x86_hw_tss       x86_tss;
+ 
+@@ -337,15 +346,9 @@ struct tss_struct {
+        * be within the limit.
+        */
+       unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
++} __aligned(PAGE_SIZE);
+ 
+-      /*
+-       * Space for the temporary SYSENTER stack.
+-       */
+-      unsigned long           SYSENTER_stack_canary;
+-      unsigned long           SYSENTER_stack[64];
+-} ____cacheline_aligned;
+-
+-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
++DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss);
+ 
+ /*
+  * sizeof(unsigned long) coming from an extra "long" at the end
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index e526d82b546c..e61eff11f562 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -487,6 +487,27 @@ static inline void setup_cpu_entry_area(int cpu)
+ #endif
+ 
+       __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
++
++      /*
++       * The Intel SDM says (Volume 3, 7.2.1):
++       *
++       *  Avoid placing a page boundary in the part of the TSS that the
++       *  processor reads during a task switch (the first 104 bytes). The
++       *  processor may not correctly perform address translations if a
++       *  boundary occurs in this area. During a task switch, the processor
++       *  reads and writes into the first 104 bytes of each TSS (using
++       *  contiguous physical addresses beginning with the physical address
++       *  of the first byte of the TSS). So, after TSS access begins, if
++       *  part of the 104 bytes is not physically contiguous, the processor
++       *  will access incorrect information without generating a page-fault
++       *  exception.
++       *
++       * There are also a lot of errata involving the TSS spanning a page
++       * boundary.  Assert that we're not doing that.
++       */
++      BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
++                    offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
++
+ }
+ 
+ /* Load the original GDT from the per-cpu structure */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0152-x86-entry-Remap-the-TSS-into-the-CPU-entry-area.patch b/patches/kernel/0152-x86-entry-Remap-the-TSS-into-the-CPU-entry-area.patch

deleted file mode 100644 (file)

index b42d76d..0000000
--- a/patches/kernel/0152-x86-entry-Remap-the-TSS-into-the-CPU-entry-area.patch
+++ /dev/null
@@ -1,286 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:20 +0100
-Subject: [PATCH] x86/entry: Remap the TSS into the CPU entry area
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This has a secondary purpose: it puts the entry stack into a region
-with a well-controlled layout.  A subsequent patch will take
-advantage of this to streamline the SYSCALL entry code to be able to
-find it more easily.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bpetkov@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150605.962042855@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 72f5e08dbba2d01aa90b592cf76c378ea233b00b)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 475b37e78defbc4cb91d54e2bcf18aa75611bb3a)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/fixmap.h |  7 +++++++
- arch/x86/kernel/asm-offsets.c |  3 +++
- arch/x86/kernel/cpu/common.c  | 41 +++++++++++++++++++++++++++++++++++------
- arch/x86/kernel/dumpstack.c   |  3 ++-
- arch/x86/kvm/vmx.c            |  2 +-
- arch/x86/power/cpu.c          | 11 ++++++-----
- arch/x86/entry/entry_32.S     |  6 ++++--
- 7 files changed, 58 insertions(+), 15 deletions(-)
-
-diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
-index 8c6ed66fe957..c92fc30e6def 100644
---- a/arch/x86/include/asm/fixmap.h
-+++ b/arch/x86/include/asm/fixmap.h
-@@ -54,6 +54,13 @@ extern unsigned long __FIXADDR_TOP;
-  */
- struct cpu_entry_area {
-       char gdt[PAGE_SIZE];
-+
-+      /*
-+       * The GDT is just below cpu_tss and thus serves (on x86_64) as a
-+       * a read-only guard page for the SYSENTER stack at the bottom
-+       * of the TSS region.
-+       */
-+      struct tss_struct tss;
- };
- 
- #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
-diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
-index 031bd35bd911..f765c3253ec3 100644
---- a/arch/x86/kernel/asm-offsets.c
-+++ b/arch/x86/kernel/asm-offsets.c
-@@ -97,4 +97,7 @@ void common(void) {
-       OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
-       /* Size of SYSENTER_stack */
-       DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
-+
-+      /* Layout info for cpu_entry_area */
-+      OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
- }
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index e61eff11f562..4a38de4c6ede 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -466,6 +466,22 @@ void load_percpu_segment(int cpu)
-       load_stack_canary_segment();
- }
- 
-+static void set_percpu_fixmap_pages(int fixmap_index, void *ptr,
-+                                  int pages, pgprot_t prot)
-+{
-+      int i;
-+
-+      for (i = 0; i < pages; i++) {
-+              __set_fixmap(fixmap_index - i,
-+                           per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot);
-+      }
-+}
-+
-+#ifdef CONFIG_X86_32
-+/* The 32-bit entry code needs to find cpu_entry_area. */
-+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
-+#endif
-+
- /* Setup the fixmap mappings only once per-processor */
- static inline void setup_cpu_entry_area(int cpu)
- {
-@@ -507,7 +523,15 @@ static inline void setup_cpu_entry_area(int cpu)
-        */
-       BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
-                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
-+      BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
-+      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
-+                              &per_cpu(cpu_tss, cpu),
-+                              sizeof(struct tss_struct) / PAGE_SIZE,
-+                              PAGE_KERNEL);
- 
-+#ifdef CONFIG_X86_32
-+      this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu));
-+#endif
- }
- 
- /* Load the original GDT from the per-cpu structure */
-@@ -1249,7 +1273,8 @@ void enable_sep_cpu(void)
-       wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
- 
-       wrmsr(MSR_IA32_SYSENTER_ESP,
--            (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
-+            (unsigned long)&get_cpu_entry_area(cpu)->tss +
-+            offsetofend(struct tss_struct, SYSENTER_stack),
-             0);
- 
-       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
-@@ -1371,6 +1396,8 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
- /* May not be marked __init: used by software suspend */
- void syscall_init(void)
- {
-+      int cpu = smp_processor_id();
-+
-       wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
-       wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
- 
-@@ -1384,7 +1411,7 @@ void syscall_init(void)
-        */
-       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-       wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
--                  (unsigned long)this_cpu_ptr(&cpu_tss) +
-+                  (unsigned long)&get_cpu_entry_area(cpu)->tss +
-                   offsetofend(struct tss_struct, SYSENTER_stack));
-       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
- #else
-@@ -1593,11 +1620,13 @@ void cpu_init(void)
-       BUG_ON(me->mm);
-       enter_lazy_tlb(&init_mm, me);
- 
-+      setup_cpu_entry_area(cpu);
-+
-       /*
-        * Initialize the TSS.  Don't bother initializing sp0, as the initial
-        * task never enters user mode.
-        */
--      set_tss_desc(cpu, &t->x86_tss);
-+      set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
-       load_TR_desc();
- 
-       load_mm_ldt(&init_mm);
-@@ -1610,7 +1639,6 @@ void cpu_init(void)
-       if (is_uv_system())
-               uv_cpu_init();
- 
--      setup_cpu_entry_area(cpu);
-       load_fixmap_gdt(cpu);
- }
- 
-@@ -1650,11 +1678,13 @@ void cpu_init(void)
-       BUG_ON(curr->mm);
-       enter_lazy_tlb(&init_mm, curr);
- 
-+      setup_cpu_entry_area(cpu);
-+
-       /*
-        * Initialize the TSS.  Don't bother initializing sp0, as the initial
-        * task never enters user mode.
-        */
--      set_tss_desc(cpu, &t->x86_tss);
-+      set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
-       load_TR_desc();
- 
-       load_mm_ldt(&init_mm);
-@@ -1671,7 +1701,6 @@ void cpu_init(void)
- 
-       fpu__init_cpu();
- 
--      setup_cpu_entry_area(cpu);
-       load_fixmap_gdt(cpu);
- }
- #endif
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index 0f4b931e1a02..c1f503673f1e 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -45,7 +45,8 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
- 
- bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
- {
--      struct tss_struct *tss = this_cpu_ptr(&cpu_tss);
-+      int cpu = smp_processor_id();
-+      struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss;
- 
-       /* Treat the canary as part of the stack for unwinding purposes. */
-       void *begin = &tss->SYSENTER_stack_canary;
-diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
-index a7c5a47beab7..d61986a36575 100644
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -2280,7 +2280,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-                * processors.  See 22.2.4.
-                */
-               vmcs_writel(HOST_TR_BASE,
--                          (unsigned long)this_cpu_ptr(&cpu_tss.x86_tss));
-+                          (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
-               vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
- 
-               /*
-diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
-index 48cd87fc7222..2a717e023c9f 100644
---- a/arch/x86/power/cpu.c
-+++ b/arch/x86/power/cpu.c
-@@ -160,18 +160,19 @@ static void do_fpu_end(void)
- static void fix_processor_context(void)
- {
-       int cpu = smp_processor_id();
--      struct tss_struct *t = &per_cpu(cpu_tss, cpu);
- #ifdef CONFIG_X86_64
-       struct desc_struct *desc = get_cpu_gdt_rw(cpu);
-       tss_desc tss;
- #endif
- 
-       /*
--       * This just modifies memory; should not be necessary. But... This is
--       * necessary, because 386 hardware has concept of busy TSS or some
--       * similar stupidity.
-+       * We need to reload TR, which requires that we change the
-+       * GDT entry to indicate "available" first.
-+       *
-+       * XXX: This could probably all be replaced by a call to
-+       * force_reload_TR().
-        */
--      set_tss_desc(cpu, &t->x86_tss);
-+      set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
- 
- #ifdef CONFIG_X86_64
-       memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
-diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
-index 0092da1c056f..41e0e103f090 100644
---- a/arch/x86/entry/entry_32.S
-+++ b/arch/x86/entry/entry_32.S
-@@ -948,7 +948,8 @@ ENTRY(debug)
-       movl    %esp, %eax                      # pt_regs pointer
- 
-       /* Are we currently on the SYSENTER stack? */
--      PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-+      movl    PER_CPU_VAR(cpu_entry_area), %ecx
-+      addl    $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
-       jb      .Ldebug_from_sysenter_stack
-@@ -991,7 +992,8 @@ ENTRY(nmi)
-       movl    %esp, %eax                      # pt_regs pointer
- 
-       /* Are we currently on the SYSENTER stack? */
--      PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-+      movl    PER_CPU_VAR(cpu_entry_area), %ecx
-+      addl    $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
-       jb      .Lnmi_from_sysenter_stack
--- 
-2.14.2
-
diff --git a/patches/kernel/0153-x86-entry-64-Separate-cpu_current_top_of_stack-from-.patch b/patches/kernel/0153-x86-entry-64-Separate-cpu_current_top_of_stack-from-.patch

deleted file mode 100644 (file)

index c1d06d2..0000000
--- a/patches/kernel/0153-x86-entry-64-Separate-cpu_current_top_of_stack-from-.patch
+++ /dev/null
@@ -1,161 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:21 +0100
-Subject: [PATCH] x86/entry/64: Separate cpu_current_top_of_stack from TSS.sp0
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-On 64-bit kernels, we used to assume that TSS.sp0 was the current
-top of stack.  With the addition of an entry trampoline, this will
-no longer be the case.  Store the current top of stack in TSS.sp1,
-which is otherwise unused but shares the same cacheline.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150606.050864668@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 9aaefe7b59ae00605256a7d6bd1c1456432495fc)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 281be4ff07f7c67dc2a9c75ab24a7b9ff25544ae)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/processor.h   | 18 +++++++++++++-----
- arch/x86/include/asm/thread_info.h |  2 +-
- arch/x86/kernel/asm-offsets_64.c   |  1 +
- arch/x86/kernel/process.c          | 10 ++++++++++
- arch/x86/kernel/process_64.c       |  1 +
- 5 files changed, 26 insertions(+), 6 deletions(-)
-
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 55885465c3a7..1bfe4bad797a 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -303,7 +303,13 @@ struct x86_hw_tss {
- struct x86_hw_tss {
-       u32                     reserved1;
-       u64                     sp0;
-+
-+      /*
-+       * We store cpu_current_top_of_stack in sp1 so it's always accessible.
-+       * Linux does not use ring 1, so sp1 is not otherwise needed.
-+       */
-       u64                     sp1;
-+
-       u64                     sp2;
-       u64                     reserved2;
-       u64                     ist[7];
-@@ -362,6 +368,8 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss);
- 
- #ifdef CONFIG_X86_32
- DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
-+#else
-+#define cpu_current_top_of_stack cpu_tss.x86_tss.sp1
- #endif
- 
- /*
-@@ -533,12 +541,12 @@ static inline void native_swapgs(void)
- 
- static inline unsigned long current_top_of_stack(void)
- {
--#ifdef CONFIG_X86_64
--      return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
--#else
--      /* sp0 on x86_32 is special in and around vm86 mode. */
-+      /*
-+       *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
-+       *  and around vm86 mode and sp0 on x86_64 is special because of the
-+       *  entry trampoline.
-+       */
-       return this_cpu_read_stable(cpu_current_top_of_stack);
--#endif
- }
- 
- static inline bool on_thread_stack(void)
-diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
-index ec8ef3bbb7dc..760dd8a73927 100644
---- a/arch/x86/include/asm/thread_info.h
-+++ b/arch/x86/include/asm/thread_info.h
-@@ -214,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack,
- #else /* !__ASSEMBLY__ */
- 
- #ifdef CONFIG_X86_64
--# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
-+# define cpu_current_top_of_stack (cpu_tss + TSS_sp1)
- #endif
- 
- #endif
-diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
-index c21a5315b38e..048f68ff3396 100644
---- a/arch/x86/kernel/asm-offsets_64.c
-+++ b/arch/x86/kernel/asm-offsets_64.c
-@@ -65,6 +65,7 @@ int main(void)
- 
-       OFFSET(TSS_ist, tss_struct, x86_tss.ist);
-       OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
-+      OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
-       BLANK();
- 
- #ifdef CONFIG_CC_STACKPROTECTOR
-diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index aa86e810fb54..407fc37a8718 100644
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -55,6 +55,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
-                * Poison it.
-                */
-               .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
-+
-+#ifdef CONFIG_X86_64
-+              /*
-+               * .sp1 is cpu_current_top_of_stack.  The init task never
-+               * runs user code, but cpu_current_top_of_stack should still
-+               * be well defined before the first context switch.
-+               */
-+              .sp1 = TOP_OF_INIT_STACK,
-+#endif
-+
- #ifdef CONFIG_X86_32
-               .ss0 = __KERNEL_DS,
-               .ss1 = __KERNEL_CS,
-diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
-index 01b119bebb68..157f81816915 100644
---- a/arch/x86/kernel/process_64.c
-+++ b/arch/x86/kernel/process_64.c
-@@ -461,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-        * Switch the PDA and FPU contexts.
-        */
-       this_cpu_write(current_task, next_p);
-+      this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
- 
-       /* Reload sp0. */
-       update_sp0(next_p);
--- 
-2.14.2
-
diff --git a/patches/kernel/0153-x86-entry-Remap-the-TSS-into-the-CPU-entry-area.patch b/patches/kernel/0153-x86-entry-Remap-the-TSS-into-the-CPU-entry-area.patch

new file mode 100644 (file)

index 0000000..b42d76d
--- /dev/null
+++ b/patches/kernel/0153-x86-entry-Remap-the-TSS-into-the-CPU-entry-area.patch
@@ -0,0 +1,286 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:20 +0100
+Subject: [PATCH] x86/entry: Remap the TSS into the CPU entry area
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This has a secondary purpose: it puts the entry stack into a region
+with a well-controlled layout.  A subsequent patch will take
+advantage of this to streamline the SYSCALL entry code to be able to
+find it more easily.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bpetkov@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150605.962042855@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 72f5e08dbba2d01aa90b592cf76c378ea233b00b)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 475b37e78defbc4cb91d54e2bcf18aa75611bb3a)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/fixmap.h |  7 +++++++
+ arch/x86/kernel/asm-offsets.c |  3 +++
+ arch/x86/kernel/cpu/common.c  | 41 +++++++++++++++++++++++++++++++++++------
+ arch/x86/kernel/dumpstack.c   |  3 ++-
+ arch/x86/kvm/vmx.c            |  2 +-
+ arch/x86/power/cpu.c          | 11 ++++++-----
+ arch/x86/entry/entry_32.S     |  6 ++++--
+ 7 files changed, 58 insertions(+), 15 deletions(-)
+
+diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
+index 8c6ed66fe957..c92fc30e6def 100644
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -54,6 +54,13 @@ extern unsigned long __FIXADDR_TOP;
+  */
+ struct cpu_entry_area {
+       char gdt[PAGE_SIZE];
++
++      /*
++       * The GDT is just below cpu_tss and thus serves (on x86_64) as a
++       * a read-only guard page for the SYSENTER stack at the bottom
++       * of the TSS region.
++       */
++      struct tss_struct tss;
+ };
+ 
+ #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
+diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
+index 031bd35bd911..f765c3253ec3 100644
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -97,4 +97,7 @@ void common(void) {
+       OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+       /* Size of SYSENTER_stack */
+       DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
++
++      /* Layout info for cpu_entry_area */
++      OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+ }
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index e61eff11f562..4a38de4c6ede 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -466,6 +466,22 @@ void load_percpu_segment(int cpu)
+       load_stack_canary_segment();
+ }
+ 
++static void set_percpu_fixmap_pages(int fixmap_index, void *ptr,
++                                  int pages, pgprot_t prot)
++{
++      int i;
++
++      for (i = 0; i < pages; i++) {
++              __set_fixmap(fixmap_index - i,
++                           per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot);
++      }
++}
++
++#ifdef CONFIG_X86_32
++/* The 32-bit entry code needs to find cpu_entry_area. */
++DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
++#endif
++
+ /* Setup the fixmap mappings only once per-processor */
+ static inline void setup_cpu_entry_area(int cpu)
+ {
+@@ -507,7 +523,15 @@ static inline void setup_cpu_entry_area(int cpu)
+        */
+       BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
++      BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
++                              &per_cpu(cpu_tss, cpu),
++                              sizeof(struct tss_struct) / PAGE_SIZE,
++                              PAGE_KERNEL);
+ 
++#ifdef CONFIG_X86_32
++      this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu));
++#endif
+ }
+ 
+ /* Load the original GDT from the per-cpu structure */
+@@ -1249,7 +1273,8 @@ void enable_sep_cpu(void)
+       wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
+ 
+       wrmsr(MSR_IA32_SYSENTER_ESP,
+-            (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
++            (unsigned long)&get_cpu_entry_area(cpu)->tss +
++            offsetofend(struct tss_struct, SYSENTER_stack),
+             0);
+ 
+       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
+@@ -1371,6 +1396,8 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+ /* May not be marked __init: used by software suspend */
+ void syscall_init(void)
+ {
++      int cpu = smp_processor_id();
++
+       wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
+       wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+ 
+@@ -1384,7 +1411,7 @@ void syscall_init(void)
+        */
+       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+       wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
+-                  (unsigned long)this_cpu_ptr(&cpu_tss) +
++                  (unsigned long)&get_cpu_entry_area(cpu)->tss +
+                   offsetofend(struct tss_struct, SYSENTER_stack));
+       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+ #else
+@@ -1593,11 +1620,13 @@ void cpu_init(void)
+       BUG_ON(me->mm);
+       enter_lazy_tlb(&init_mm, me);
+ 
++      setup_cpu_entry_area(cpu);
++
+       /*
+        * Initialize the TSS.  Don't bother initializing sp0, as the initial
+        * task never enters user mode.
+        */
+-      set_tss_desc(cpu, &t->x86_tss);
++      set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+       load_TR_desc();
+ 
+       load_mm_ldt(&init_mm);
+@@ -1610,7 +1639,6 @@ void cpu_init(void)
+       if (is_uv_system())
+               uv_cpu_init();
+ 
+-      setup_cpu_entry_area(cpu);
+       load_fixmap_gdt(cpu);
+ }
+ 
+@@ -1650,11 +1678,13 @@ void cpu_init(void)
+       BUG_ON(curr->mm);
+       enter_lazy_tlb(&init_mm, curr);
+ 
++      setup_cpu_entry_area(cpu);
++
+       /*
+        * Initialize the TSS.  Don't bother initializing sp0, as the initial
+        * task never enters user mode.
+        */
+-      set_tss_desc(cpu, &t->x86_tss);
++      set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+       load_TR_desc();
+ 
+       load_mm_ldt(&init_mm);
+@@ -1671,7 +1701,6 @@ void cpu_init(void)
+ 
+       fpu__init_cpu();
+ 
+-      setup_cpu_entry_area(cpu);
+       load_fixmap_gdt(cpu);
+ }
+ #endif
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index 0f4b931e1a02..c1f503673f1e 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -45,7 +45,8 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
+ 
+ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
+ {
+-      struct tss_struct *tss = this_cpu_ptr(&cpu_tss);
++      int cpu = smp_processor_id();
++      struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss;
+ 
+       /* Treat the canary as part of the stack for unwinding purposes. */
+       void *begin = &tss->SYSENTER_stack_canary;
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index a7c5a47beab7..d61986a36575 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2280,7 +2280,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+                * processors.  See 22.2.4.
+                */
+               vmcs_writel(HOST_TR_BASE,
+-                          (unsigned long)this_cpu_ptr(&cpu_tss.x86_tss));
++                          (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
+               vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
+ 
+               /*
+diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
+index 48cd87fc7222..2a717e023c9f 100644
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -160,18 +160,19 @@ static void do_fpu_end(void)
+ static void fix_processor_context(void)
+ {
+       int cpu = smp_processor_id();
+-      struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+ #ifdef CONFIG_X86_64
+       struct desc_struct *desc = get_cpu_gdt_rw(cpu);
+       tss_desc tss;
+ #endif
+ 
+       /*
+-       * This just modifies memory; should not be necessary. But... This is
+-       * necessary, because 386 hardware has concept of busy TSS or some
+-       * similar stupidity.
++       * We need to reload TR, which requires that we change the
++       * GDT entry to indicate "available" first.
++       *
++       * XXX: This could probably all be replaced by a call to
++       * force_reload_TR().
+        */
+-      set_tss_desc(cpu, &t->x86_tss);
++      set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+ 
+ #ifdef CONFIG_X86_64
+       memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
+index 0092da1c056f..41e0e103f090 100644
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -948,7 +948,8 @@ ENTRY(debug)
+       movl    %esp, %eax                      # pt_regs pointer
+ 
+       /* Are we currently on the SYSENTER stack? */
+-      PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
++      movl    PER_CPU_VAR(cpu_entry_area), %ecx
++      addl    $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       jb      .Ldebug_from_sysenter_stack
+@@ -991,7 +992,8 @@ ENTRY(nmi)
+       movl    %esp, %eax                      # pt_regs pointer
+ 
+       /* Are we currently on the SYSENTER stack? */
+-      PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
++      movl    PER_CPU_VAR(cpu_entry_area), %ecx
++      addl    $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       jb      .Lnmi_from_sysenter_stack
+-- 
+2.14.2
+
diff --git a/patches/kernel/0154-x86-entry-64-Separate-cpu_current_top_of_stack-from-.patch b/patches/kernel/0154-x86-entry-64-Separate-cpu_current_top_of_stack-from-.patch

new file mode 100644 (file)

index 0000000..c1d06d2
--- /dev/null
+++ b/patches/kernel/0154-x86-entry-64-Separate-cpu_current_top_of_stack-from-.patch
@@ -0,0 +1,161 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:21 +0100
+Subject: [PATCH] x86/entry/64: Separate cpu_current_top_of_stack from TSS.sp0
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+On 64-bit kernels, we used to assume that TSS.sp0 was the current
+top of stack.  With the addition of an entry trampoline, this will
+no longer be the case.  Store the current top of stack in TSS.sp1,
+which is otherwise unused but shares the same cacheline.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.050864668@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 9aaefe7b59ae00605256a7d6bd1c1456432495fc)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 281be4ff07f7c67dc2a9c75ab24a7b9ff25544ae)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/processor.h   | 18 +++++++++++++-----
+ arch/x86/include/asm/thread_info.h |  2 +-
+ arch/x86/kernel/asm-offsets_64.c   |  1 +
+ arch/x86/kernel/process.c          | 10 ++++++++++
+ arch/x86/kernel/process_64.c       |  1 +
+ 5 files changed, 26 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 55885465c3a7..1bfe4bad797a 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -303,7 +303,13 @@ struct x86_hw_tss {
+ struct x86_hw_tss {
+       u32                     reserved1;
+       u64                     sp0;
++
++      /*
++       * We store cpu_current_top_of_stack in sp1 so it's always accessible.
++       * Linux does not use ring 1, so sp1 is not otherwise needed.
++       */
+       u64                     sp1;
++
+       u64                     sp2;
+       u64                     reserved2;
+       u64                     ist[7];
+@@ -362,6 +368,8 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss);
+ 
+ #ifdef CONFIG_X86_32
+ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
++#else
++#define cpu_current_top_of_stack cpu_tss.x86_tss.sp1
+ #endif
+ 
+ /*
+@@ -533,12 +541,12 @@ static inline void native_swapgs(void)
+ 
+ static inline unsigned long current_top_of_stack(void)
+ {
+-#ifdef CONFIG_X86_64
+-      return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
+-#else
+-      /* sp0 on x86_32 is special in and around vm86 mode. */
++      /*
++       *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
++       *  and around vm86 mode and sp0 on x86_64 is special because of the
++       *  entry trampoline.
++       */
+       return this_cpu_read_stable(cpu_current_top_of_stack);
+-#endif
+ }
+ 
+ static inline bool on_thread_stack(void)
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index ec8ef3bbb7dc..760dd8a73927 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -214,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack,
+ #else /* !__ASSEMBLY__ */
+ 
+ #ifdef CONFIG_X86_64
+-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
++# define cpu_current_top_of_stack (cpu_tss + TSS_sp1)
+ #endif
+ 
+ #endif
+diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
+index c21a5315b38e..048f68ff3396 100644
+--- a/arch/x86/kernel/asm-offsets_64.c
++++ b/arch/x86/kernel/asm-offsets_64.c
+@@ -65,6 +65,7 @@ int main(void)
+ 
+       OFFSET(TSS_ist, tss_struct, x86_tss.ist);
+       OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
++      OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
+       BLANK();
+ 
+ #ifdef CONFIG_CC_STACKPROTECTOR
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index aa86e810fb54..407fc37a8718 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -55,6 +55,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+                * Poison it.
+                */
+               .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
++
++#ifdef CONFIG_X86_64
++              /*
++               * .sp1 is cpu_current_top_of_stack.  The init task never
++               * runs user code, but cpu_current_top_of_stack should still
++               * be well defined before the first context switch.
++               */
++              .sp1 = TOP_OF_INIT_STACK,
++#endif
++
+ #ifdef CONFIG_X86_32
+               .ss0 = __KERNEL_DS,
+               .ss1 = __KERNEL_CS,
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index 01b119bebb68..157f81816915 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -461,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+        * Switch the PDA and FPU contexts.
+        */
+       this_cpu_write(current_task, next_p);
++      this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
+ 
+       /* Reload sp0. */
+       update_sp0(next_p);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0154-x86-espfix-64-Stop-assuming-that-pt_regs-is-on-the-e.patch b/patches/kernel/0154-x86-espfix-64-Stop-assuming-that-pt_regs-is-on-the-e.patch

deleted file mode 100644 (file)

index 8499516..0000000
--- a/patches/kernel/0154-x86-espfix-64-Stop-assuming-that-pt_regs-is-on-the-e.patch
+++ /dev/null
@@ -1,124 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:22 +0100
-Subject: [PATCH] x86/espfix/64: Stop assuming that pt_regs is on the entry
- stack
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-When we start using an entry trampoline, a #GP from userspace will
-be delivered on the entry stack, not on the task stack.  Fix the
-espfix64 #DF fixup to set up #GP according to TSS.SP0, rather than
-assuming that pt_regs + 1 == SP0.  This won't change anything
-without an entry stack, but it will make the code continue to work
-when an entry stack is added.
-
-While we're at it, improve the comments to explain what's actually
-going on.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150606.130778051@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 6d9256f0a89eaff97fca6006100bcaea8d1d8bdb)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f5d8df279d00c22e4c338a5891a874a59947e5f5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/traps.c | 37 ++++++++++++++++++++++++++++---------
- 1 file changed, 28 insertions(+), 9 deletions(-)
-
-diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
-index 7b1d0df624cf..b69db1ee8733 100644
---- a/arch/x86/kernel/traps.c
-+++ b/arch/x86/kernel/traps.c
-@@ -360,9 +360,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
- 
-       /*
-        * If IRET takes a non-IST fault on the espfix64 stack, then we
--       * end up promoting it to a doublefault.  In that case, modify
--       * the stack to make it look like we just entered the #GP
--       * handler from user space, similar to bad_iret.
-+       * end up promoting it to a doublefault.  In that case, take
-+       * advantage of the fact that we're not using the normal (TSS.sp0)
-+       * stack right now.  We can write a fake #GP(0) frame at TSS.sp0
-+       * and then modify our own IRET frame so that, when we return,
-+       * we land directly at the #GP(0) vector with the stack already
-+       * set up according to its expectations.
-+       *
-+       * The net result is that our #GP handler will think that we
-+       * entered from usermode with the bad user context.
-        *
-        * No need for ist_enter here because we don't use RCU.
-        */
-@@ -370,13 +376,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
-               regs->cs == __KERNEL_CS &&
-               regs->ip == (unsigned long)native_irq_return_iret)
-       {
--              struct pt_regs *normal_regs = task_pt_regs(current);
-+              struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
-+
-+              /*
-+               * regs->sp points to the failing IRET frame on the
-+               * ESPFIX64 stack.  Copy it to the entry stack.  This fills
-+               * in gpregs->ss through gpregs->ip.
-+               *
-+               */
-+              memmove(&gpregs->ip, (void *)regs->sp, 5*8);
-+              gpregs->orig_ax = 0;  /* Missing (lost) #GP error code */
- 
--              /* Fake a #GP(0) from userspace. */
--              memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
--              normal_regs->orig_ax = 0;  /* Missing (lost) #GP error code */
-+              /*
-+               * Adjust our frame so that we return straight to the #GP
-+               * vector with the expected RSP value.  This is safe because
-+               * we won't enable interupts or schedule before we invoke
-+               * general_protection, so nothing will clobber the stack
-+               * frame we just set up.
-+               */
-               regs->ip = (unsigned long)general_protection;
--              regs->sp = (unsigned long)&normal_regs->orig_ax;
-+              regs->sp = (unsigned long)&gpregs->orig_ax;
- 
-               return;
-       }
-@@ -401,7 +420,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
-        *
-        *   Processors update CR2 whenever a page fault is detected. If a
-        *   second page fault occurs while an earlier page fault is being
--       *   deliv- ered, the faulting linear address of the second fault will
-+       *   delivered, the faulting linear address of the second fault will
-        *   overwrite the contents of CR2 (replacing the previous
-        *   address). These updates to CR2 occur even if the page fault
-        *   results in a double fault or occurs during the delivery of a
--- 
-2.14.2
-
diff --git a/patches/kernel/0155-x86-entry-64-Use-a-per-CPU-trampoline-stack-for-IDT-.patch b/patches/kernel/0155-x86-entry-64-Use-a-per-CPU-trampoline-stack-for-IDT-.patch

deleted file mode 100644 (file)

index bfea36c..0000000
--- a/patches/kernel/0155-x86-entry-64-Use-a-per-CPU-trampoline-stack-for-IDT-.patch
+++ /dev/null
@@ -1,295 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:23 +0100
-Subject: [PATCH] x86/entry/64: Use a per-CPU trampoline stack for IDT entries
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Historically, IDT entries from usermode have always gone directly
-to the running task's kernel stack.  Rearrange it so that we enter on
-a per-CPU trampoline stack and then manually switch to the task's stack.
-This touches a couple of extra cachelines, but it gives us a chance
-to run some code before we touch the kernel stack.
-
-The asm isn't exactly beautiful, but I think that fully refactoring
-it can wait.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150606.225330557@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 7f2590a110b837af5679d08fc25c6227c5a8c497)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit bfb2d0ede023853fb8c24d3dae8974cb2f7117c3)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/switch_to.h |  4 ++-
- arch/x86/include/asm/traps.h     |  1 -
- arch/x86/kernel/cpu/common.c     |  6 ++--
- arch/x86/kernel/traps.c          | 21 +++++++------
- arch/x86/entry/entry_64.S        | 67 ++++++++++++++++++++++++++++++----------
- arch/x86/entry/entry_64_compat.S |  5 ++-
- 6 files changed, 72 insertions(+), 32 deletions(-)
-
-diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
-index 010cd6e4eafc..ca2fc84ad278 100644
---- a/arch/x86/include/asm/switch_to.h
-+++ b/arch/x86/include/asm/switch_to.h
-@@ -89,10 +89,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
- /* This is used when switching tasks or entering/exiting vm86 mode. */
- static inline void update_sp0(struct task_struct *task)
- {
-+      /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
- #ifdef CONFIG_X86_32
-       load_sp0(task->thread.sp0);
- #else
--      load_sp0(task_top_of_stack(task));
-+      if (static_cpu_has(X86_FEATURE_XENPV))
-+              load_sp0(task_top_of_stack(task));
- #endif
- }
- 
-diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
-index b052a7621ca1..c3b652672d6f 100644
---- a/arch/x86/include/asm/traps.h
-+++ b/arch/x86/include/asm/traps.h
-@@ -92,7 +92,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
- dotraplinkage void do_stack_segment(struct pt_regs *, long);
- #ifdef CONFIG_X86_64
- dotraplinkage void do_double_fault(struct pt_regs *, long);
--asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
- #endif
- dotraplinkage void do_general_protection(struct pt_regs *, long);
- dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 4a38de4c6ede..404e4b75db6e 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -1623,11 +1623,13 @@ void cpu_init(void)
-       setup_cpu_entry_area(cpu);
- 
-       /*
--       * Initialize the TSS.  Don't bother initializing sp0, as the initial
--       * task never enters user mode.
-+       * Initialize the TSS.  sp0 points to the entry trampoline stack
-+       * regardless of what task is running.
-        */
-       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
-       load_TR_desc();
-+      load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss +
-+               offsetofend(struct tss_struct, SYSENTER_stack));
- 
-       load_mm_ldt(&init_mm);
- 
-diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
-index b69db1ee8733..d9debdafe7a6 100644
---- a/arch/x86/kernel/traps.c
-+++ b/arch/x86/kernel/traps.c
-@@ -631,14 +631,15 @@ NOKPROBE_SYMBOL(do_int3);
- 
- #ifdef CONFIG_X86_64
- /*
-- * Help handler running on IST stack to switch off the IST stack if the
-- * interrupted code was in user mode. The actual stack switch is done in
-- * entry_64.S
-+ * Help handler running on a per-cpu (IST or entry trampoline) stack
-+ * to switch to the normal thread stack if the interrupted code was in
-+ * user mode. The actual stack switch is done in entry_64.S
-  */
- asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
- {
--      struct pt_regs *regs = task_pt_regs(current);
--      *regs = *eregs;
-+      struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
-+      if (regs != eregs)
-+              *regs = *eregs;
-       return regs;
- }
- NOKPROBE_SYMBOL(sync_regs);
-@@ -654,13 +655,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
-       /*
-        * This is called from entry_64.S early in handling a fault
-        * caused by a bad iret to user mode.  To handle the fault
--       * correctly, we want move our stack frame to task_pt_regs
--       * and we want to pretend that the exception came from the
--       * iret target.
-+       * correctly, we want to move our stack frame to where it would
-+       * be had we entered directly on the entry stack (rather than
-+       * just below the IRET frame) and we want to pretend that the
-+       * exception came from the IRET target.
-        */
-       struct bad_iret_stack *new_stack =
--              container_of(task_pt_regs(current),
--                           struct bad_iret_stack, regs);
-+              (struct bad_iret_stack *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
- 
-       /* Copy the IRET target to the new stack. */
-       memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 6c73e96daf78..f70fedc58bac 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -559,6 +559,13 @@ END(irq_entries_start)
- /* 0(%rsp): ~(interrupt number) */
-       .macro interrupt func
-       cld
-+
-+      testb   $3, CS-ORIG_RAX(%rsp)
-+      jz      1f
-+      SWAPGS
-+      call    switch_to_thread_stack
-+1:
-+
-       ALLOC_PT_GPREGS_ON_STACK
-       SAVE_C_REGS
-       SAVE_EXTRA_REGS
-@@ -568,12 +575,8 @@ END(irq_entries_start)
-       jz      1f
- 
-       /*
--       * IRQ from user mode.  Switch to kernel gsbase and inform context
--       * tracking that we're in kernel mode.
--       */
--      SWAPGS
--
--      /*
-+       * IRQ from user mode.
-+       *
-        * We need to tell lockdep that IRQs are off.  We can't do this until
-        * we fix gsbase, and we should do it before enter_from_user_mode
-        * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
-@@ -840,6 +843,32 @@ apicinterrupt IRQ_WORK_VECTOR                     irq_work_interrupt              smp_irq_work_interrupt
-  */
- #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
- 
-+/*
-+ * Switch to the thread stack.  This is called with the IRET frame and
-+ * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
-+ * space has not been allocated for them.)
-+ */
-+ENTRY(switch_to_thread_stack)
-+      UNWIND_HINT_FUNC
-+
-+      pushq   %rdi
-+      movq    %rsp, %rdi
-+      movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-+      UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
-+
-+      pushq   7*8(%rdi)               /* regs->ss */
-+      pushq   6*8(%rdi)               /* regs->rsp */
-+      pushq   5*8(%rdi)               /* regs->eflags */
-+      pushq   4*8(%rdi)               /* regs->cs */
-+      pushq   3*8(%rdi)               /* regs->ip */
-+      pushq   2*8(%rdi)               /* regs->orig_ax */
-+      pushq   8(%rdi)                 /* return address */
-+      UNWIND_HINT_FUNC
-+
-+      movq    (%rdi), %rdi
-+      ret
-+END(switch_to_thread_stack)
-+
- .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
- ENTRY(\sym)
-       UNWIND_HINT_IRET_REGS offset=\has_error_code*8
-@@ -857,11 +886,12 @@ ENTRY(\sym)
- 
-       ALLOC_PT_GPREGS_ON_STACK
- 
--      .if \paranoid
--      .if \paranoid == 1
-+      .if \paranoid < 2
-       testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
--      jnz     1f
-+      jnz     .Lfrom_usermode_switch_stack_\@
-       .endif
-+
-+      .if \paranoid
-       call    paranoid_entry
-       .else
-       call    error_entry
-@@ -903,20 +933,15 @@ ENTRY(\sym)
-       jmp     error_exit
-       .endif
- 
--      .if \paranoid == 1
-+      .if \paranoid < 2
-       /*
--       * Paranoid entry from userspace.  Switch stacks and treat it
-+       * Entry from userspace.  Switch stacks and treat it
-        * as a normal entry.  This means that paranoid handlers
-        * run in real process context if user_mode(regs).
-        */
--1:
-+.Lfrom_usermode_switch_stack_\@:
-       call    error_entry
- 
--
--      movq    %rsp, %rdi                      /* pt_regs pointer */
--      call    sync_regs
--      movq    %rax, %rsp                      /* switch stack */
--
-       movq    %rsp, %rdi                      /* pt_regs pointer */
- 
-       .if \has_error_code
-@@ -1177,6 +1202,14 @@ ENTRY(error_entry)
-       SWAPGS
- 
- .Lerror_entry_from_usermode_after_swapgs:
-+      /* Put us onto the real thread stack. */
-+      popq    %r12                            /* save return addr in %12 */
-+      movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
-+      call    sync_regs
-+      movq    %rax, %rsp                      /* switch stack */
-+      ENCODE_FRAME_POINTER
-+      pushq   %r12
-+
-       /*
-        * We need to tell lockdep that IRQs are off.  We can't do this until
-        * we fix gsbase, and we should do it before enter_from_user_mode
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index 1f76b66518ee..2270601b6218 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -305,8 +305,11 @@ ENTRY(entry_INT80_compat)
-        */
-       movl    %eax, %eax
- 
--      /* Construct struct pt_regs on stack (iret frame is already on stack) */
-       pushq   %rax                    /* pt_regs->orig_ax */
-+
-+      /* switch to thread stack expects orig_ax to be pushed */
-+      call    switch_to_thread_stack
-+
-       pushq   %rdi                    /* pt_regs->di */
-       pushq   %rsi                    /* pt_regs->si */
-       pushq   %rdx                    /* pt_regs->dx */
--- 
-2.14.2
-
diff --git a/patches/kernel/0155-x86-espfix-64-Stop-assuming-that-pt_regs-is-on-the-e.patch b/patches/kernel/0155-x86-espfix-64-Stop-assuming-that-pt_regs-is-on-the-e.patch

new file mode 100644 (file)

index 0000000..8499516
--- /dev/null
+++ b/patches/kernel/0155-x86-espfix-64-Stop-assuming-that-pt_regs-is-on-the-e.patch
@@ -0,0 +1,124 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:22 +0100
+Subject: [PATCH] x86/espfix/64: Stop assuming that pt_regs is on the entry
+ stack
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+When we start using an entry trampoline, a #GP from userspace will
+be delivered on the entry stack, not on the task stack.  Fix the
+espfix64 #DF fixup to set up #GP according to TSS.SP0, rather than
+assuming that pt_regs + 1 == SP0.  This won't change anything
+without an entry stack, but it will make the code continue to work
+when an entry stack is added.
+
+While we're at it, improve the comments to explain what's actually
+going on.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.130778051@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 6d9256f0a89eaff97fca6006100bcaea8d1d8bdb)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f5d8df279d00c22e4c338a5891a874a59947e5f5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/traps.c | 37 ++++++++++++++++++++++++++++---------
+ 1 file changed, 28 insertions(+), 9 deletions(-)
+
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index 7b1d0df624cf..b69db1ee8733 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -360,9 +360,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+ 
+       /*
+        * If IRET takes a non-IST fault on the espfix64 stack, then we
+-       * end up promoting it to a doublefault.  In that case, modify
+-       * the stack to make it look like we just entered the #GP
+-       * handler from user space, similar to bad_iret.
++       * end up promoting it to a doublefault.  In that case, take
++       * advantage of the fact that we're not using the normal (TSS.sp0)
++       * stack right now.  We can write a fake #GP(0) frame at TSS.sp0
++       * and then modify our own IRET frame so that, when we return,
++       * we land directly at the #GP(0) vector with the stack already
++       * set up according to its expectations.
++       *
++       * The net result is that our #GP handler will think that we
++       * entered from usermode with the bad user context.
+        *
+        * No need for ist_enter here because we don't use RCU.
+        */
+@@ -370,13 +376,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+               regs->cs == __KERNEL_CS &&
+               regs->ip == (unsigned long)native_irq_return_iret)
+       {
+-              struct pt_regs *normal_regs = task_pt_regs(current);
++              struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
++
++              /*
++               * regs->sp points to the failing IRET frame on the
++               * ESPFIX64 stack.  Copy it to the entry stack.  This fills
++               * in gpregs->ss through gpregs->ip.
++               *
++               */
++              memmove(&gpregs->ip, (void *)regs->sp, 5*8);
++              gpregs->orig_ax = 0;  /* Missing (lost) #GP error code */
+ 
+-              /* Fake a #GP(0) from userspace. */
+-              memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
+-              normal_regs->orig_ax = 0;  /* Missing (lost) #GP error code */
++              /*
++               * Adjust our frame so that we return straight to the #GP
++               * vector with the expected RSP value.  This is safe because
++               * we won't enable interupts or schedule before we invoke
++               * general_protection, so nothing will clobber the stack
++               * frame we just set up.
++               */
+               regs->ip = (unsigned long)general_protection;
+-              regs->sp = (unsigned long)&normal_regs->orig_ax;
++              regs->sp = (unsigned long)&gpregs->orig_ax;
+ 
+               return;
+       }
+@@ -401,7 +420,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+        *
+        *   Processors update CR2 whenever a page fault is detected. If a
+        *   second page fault occurs while an earlier page fault is being
+-       *   deliv- ered, the faulting linear address of the second fault will
++       *   delivered, the faulting linear address of the second fault will
+        *   overwrite the contents of CR2 (replacing the previous
+        *   address). These updates to CR2 occur even if the page fault
+        *   results in a double fault or occurs during the delivery of a
+-- 
+2.14.2
+
diff --git a/patches/kernel/0156-x86-entry-64-Return-to-userspace-from-the-trampoline.patch b/patches/kernel/0156-x86-entry-64-Return-to-userspace-from-the-trampoline.patch

deleted file mode 100644 (file)

index 20025ac..0000000
--- a/patches/kernel/0156-x86-entry-64-Return-to-userspace-from-the-trampoline.patch
+++ /dev/null
@@ -1,133 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:24 +0100
-Subject: [PATCH] x86/entry/64: Return to userspace from the trampoline stack
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-By itself, this is useless.  It gives us the ability to run some final code
-before exit that cannnot run on the kernel stack.  This could include a CR3
-switch a la PAGE_TABLE_ISOLATION or some kernel stack erasing, for
-example.  (Or even weird things like *changing* which kernel stack gets
-used as an ASLR-strengthening mechanism.)
-
-The SYSRET32 path is not covered yet.  It could be in the future or
-we could just ignore it and force the slow path if needed.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150606.306546484@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 3e3b9293d392c577b62e24e4bc9982320438e749)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 40eb58584f732a2fefb5959e79e408bedeaaa43c)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S | 55 +++++++++++++++++++++++++++++++++++++++++++----
- 1 file changed, 51 insertions(+), 4 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index f70fedc58bac..4abe5b806d2a 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -325,8 +325,24 @@ syscall_return_via_sysret:
-       popq    %rsi    /* skip rcx */
-       popq    %rdx
-       popq    %rsi
-+
-+      /*
-+       * Now all regs are restored except RSP and RDI.
-+       * Save old stack pointer and switch to trampoline stack.
-+       */
-+      movq    %rsp, %rdi
-+      movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
-+
-+      pushq   RSP-RDI(%rdi)   /* RSP */
-+      pushq   (%rdi)          /* RDI */
-+
-+      /*
-+       * We are on the trampoline stack.  All regs except RDI are live.
-+       * We can do future final exit work right here.
-+       */
-+
-       popq    %rdi
--      movq    RSP-ORIG_RAX(%rsp), %rsp
-+      popq    %rsp
-       USERGS_SYSRET64
- END(entry_SYSCALL_64)
- 
-@@ -629,10 +645,41 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
-       ud2
- 1:
- #endif
--      SWAPGS
-       POP_EXTRA_REGS
--      POP_C_REGS
--      addq    $8, %rsp        /* skip regs->orig_ax */
-+      popq    %r11
-+      popq    %r10
-+      popq    %r9
-+      popq    %r8
-+      popq    %rax
-+      popq    %rcx
-+      popq    %rdx
-+      popq    %rsi
-+
-+      /*
-+       * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
-+       * Save old stack pointer and switch to trampoline stack.
-+       */
-+      movq    %rsp, %rdi
-+      movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
-+
-+      /* Copy the IRET frame to the trampoline stack. */
-+      pushq   6*8(%rdi)       /* SS */
-+      pushq   5*8(%rdi)       /* RSP */
-+      pushq   4*8(%rdi)       /* EFLAGS */
-+      pushq   3*8(%rdi)       /* CS */
-+      pushq   2*8(%rdi)       /* RIP */
-+
-+      /* Push user RDI on the trampoline stack. */
-+      pushq   (%rdi)
-+
-+      /*
-+       * We are on the trampoline stack.  All regs except RDI are live.
-+       * We can do future final exit work right here.
-+       */
-+
-+      /* Restore RDI. */
-+      popq    %rdi
-+      SWAPGS
-       INTERRUPT_RETURN
- 
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0156-x86-entry-64-Use-a-per-CPU-trampoline-stack-for-IDT-.patch b/patches/kernel/0156-x86-entry-64-Use-a-per-CPU-trampoline-stack-for-IDT-.patch

new file mode 100644 (file)

index 0000000..bfea36c
--- /dev/null
+++ b/patches/kernel/0156-x86-entry-64-Use-a-per-CPU-trampoline-stack-for-IDT-.patch
@@ -0,0 +1,295 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:23 +0100
+Subject: [PATCH] x86/entry/64: Use a per-CPU trampoline stack for IDT entries
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Historically, IDT entries from usermode have always gone directly
+to the running task's kernel stack.  Rearrange it so that we enter on
+a per-CPU trampoline stack and then manually switch to the task's stack.
+This touches a couple of extra cachelines, but it gives us a chance
+to run some code before we touch the kernel stack.
+
+The asm isn't exactly beautiful, but I think that fully refactoring
+it can wait.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.225330557@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 7f2590a110b837af5679d08fc25c6227c5a8c497)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit bfb2d0ede023853fb8c24d3dae8974cb2f7117c3)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/switch_to.h |  4 ++-
+ arch/x86/include/asm/traps.h     |  1 -
+ arch/x86/kernel/cpu/common.c     |  6 ++--
+ arch/x86/kernel/traps.c          | 21 +++++++------
+ arch/x86/entry/entry_64.S        | 67 ++++++++++++++++++++++++++++++----------
+ arch/x86/entry/entry_64_compat.S |  5 ++-
+ 6 files changed, 72 insertions(+), 32 deletions(-)
+
+diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
+index 010cd6e4eafc..ca2fc84ad278 100644
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -89,10 +89,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
+ /* This is used when switching tasks or entering/exiting vm86 mode. */
+ static inline void update_sp0(struct task_struct *task)
+ {
++      /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
+ #ifdef CONFIG_X86_32
+       load_sp0(task->thread.sp0);
+ #else
+-      load_sp0(task_top_of_stack(task));
++      if (static_cpu_has(X86_FEATURE_XENPV))
++              load_sp0(task_top_of_stack(task));
+ #endif
+ }
+ 
+diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
+index b052a7621ca1..c3b652672d6f 100644
+--- a/arch/x86/include/asm/traps.h
++++ b/arch/x86/include/asm/traps.h
+@@ -92,7 +92,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
+ dotraplinkage void do_stack_segment(struct pt_regs *, long);
+ #ifdef CONFIG_X86_64
+ dotraplinkage void do_double_fault(struct pt_regs *, long);
+-asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
+ #endif
+ dotraplinkage void do_general_protection(struct pt_regs *, long);
+ dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 4a38de4c6ede..404e4b75db6e 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1623,11 +1623,13 @@ void cpu_init(void)
+       setup_cpu_entry_area(cpu);
+ 
+       /*
+-       * Initialize the TSS.  Don't bother initializing sp0, as the initial
+-       * task never enters user mode.
++       * Initialize the TSS.  sp0 points to the entry trampoline stack
++       * regardless of what task is running.
+        */
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+       load_TR_desc();
++      load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss +
++               offsetofend(struct tss_struct, SYSENTER_stack));
+ 
+       load_mm_ldt(&init_mm);
+ 
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index b69db1ee8733..d9debdafe7a6 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -631,14 +631,15 @@ NOKPROBE_SYMBOL(do_int3);
+ 
+ #ifdef CONFIG_X86_64
+ /*
+- * Help handler running on IST stack to switch off the IST stack if the
+- * interrupted code was in user mode. The actual stack switch is done in
+- * entry_64.S
++ * Help handler running on a per-cpu (IST or entry trampoline) stack
++ * to switch to the normal thread stack if the interrupted code was in
++ * user mode. The actual stack switch is done in entry_64.S
+  */
+ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
+ {
+-      struct pt_regs *regs = task_pt_regs(current);
+-      *regs = *eregs;
++      struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
++      if (regs != eregs)
++              *regs = *eregs;
+       return regs;
+ }
+ NOKPROBE_SYMBOL(sync_regs);
+@@ -654,13 +655,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
+       /*
+        * This is called from entry_64.S early in handling a fault
+        * caused by a bad iret to user mode.  To handle the fault
+-       * correctly, we want move our stack frame to task_pt_regs
+-       * and we want to pretend that the exception came from the
+-       * iret target.
++       * correctly, we want to move our stack frame to where it would
++       * be had we entered directly on the entry stack (rather than
++       * just below the IRET frame) and we want to pretend that the
++       * exception came from the IRET target.
+        */
+       struct bad_iret_stack *new_stack =
+-              container_of(task_pt_regs(current),
+-                           struct bad_iret_stack, regs);
++              (struct bad_iret_stack *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
+ 
+       /* Copy the IRET target to the new stack. */
+       memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 6c73e96daf78..f70fedc58bac 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -559,6 +559,13 @@ END(irq_entries_start)
+ /* 0(%rsp): ~(interrupt number) */
+       .macro interrupt func
+       cld
++
++      testb   $3, CS-ORIG_RAX(%rsp)
++      jz      1f
++      SWAPGS
++      call    switch_to_thread_stack
++1:
++
+       ALLOC_PT_GPREGS_ON_STACK
+       SAVE_C_REGS
+       SAVE_EXTRA_REGS
+@@ -568,12 +575,8 @@ END(irq_entries_start)
+       jz      1f
+ 
+       /*
+-       * IRQ from user mode.  Switch to kernel gsbase and inform context
+-       * tracking that we're in kernel mode.
+-       */
+-      SWAPGS
+-
+-      /*
++       * IRQ from user mode.
++       *
+        * We need to tell lockdep that IRQs are off.  We can't do this until
+        * we fix gsbase, and we should do it before enter_from_user_mode
+        * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
+@@ -840,6 +843,32 @@ apicinterrupt IRQ_WORK_VECTOR                     irq_work_interrupt              smp_irq_work_interrupt
+  */
+ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
+ 
++/*
++ * Switch to the thread stack.  This is called with the IRET frame and
++ * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
++ * space has not been allocated for them.)
++ */
++ENTRY(switch_to_thread_stack)
++      UNWIND_HINT_FUNC
++
++      pushq   %rdi
++      movq    %rsp, %rdi
++      movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
++      UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
++
++      pushq   7*8(%rdi)               /* regs->ss */
++      pushq   6*8(%rdi)               /* regs->rsp */
++      pushq   5*8(%rdi)               /* regs->eflags */
++      pushq   4*8(%rdi)               /* regs->cs */
++      pushq   3*8(%rdi)               /* regs->ip */
++      pushq   2*8(%rdi)               /* regs->orig_ax */
++      pushq   8(%rdi)                 /* return address */
++      UNWIND_HINT_FUNC
++
++      movq    (%rdi), %rdi
++      ret
++END(switch_to_thread_stack)
++
+ .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+ ENTRY(\sym)
+       UNWIND_HINT_IRET_REGS offset=\has_error_code*8
+@@ -857,11 +886,12 @@ ENTRY(\sym)
+ 
+       ALLOC_PT_GPREGS_ON_STACK
+ 
+-      .if \paranoid
+-      .if \paranoid == 1
++      .if \paranoid < 2
+       testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
+-      jnz     1f
++      jnz     .Lfrom_usermode_switch_stack_\@
+       .endif
++
++      .if \paranoid
+       call    paranoid_entry
+       .else
+       call    error_entry
+@@ -903,20 +933,15 @@ ENTRY(\sym)
+       jmp     error_exit
+       .endif
+ 
+-      .if \paranoid == 1
++      .if \paranoid < 2
+       /*
+-       * Paranoid entry from userspace.  Switch stacks and treat it
++       * Entry from userspace.  Switch stacks and treat it
+        * as a normal entry.  This means that paranoid handlers
+        * run in real process context if user_mode(regs).
+        */
+-1:
++.Lfrom_usermode_switch_stack_\@:
+       call    error_entry
+ 
+-
+-      movq    %rsp, %rdi                      /* pt_regs pointer */
+-      call    sync_regs
+-      movq    %rax, %rsp                      /* switch stack */
+-
+       movq    %rsp, %rdi                      /* pt_regs pointer */
+ 
+       .if \has_error_code
+@@ -1177,6 +1202,14 @@ ENTRY(error_entry)
+       SWAPGS
+ 
+ .Lerror_entry_from_usermode_after_swapgs:
++      /* Put us onto the real thread stack. */
++      popq    %r12                            /* save return addr in %12 */
++      movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
++      call    sync_regs
++      movq    %rax, %rsp                      /* switch stack */
++      ENCODE_FRAME_POINTER
++      pushq   %r12
++
+       /*
+        * We need to tell lockdep that IRQs are off.  We can't do this until
+        * we fix gsbase, and we should do it before enter_from_user_mode
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index 1f76b66518ee..2270601b6218 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -305,8 +305,11 @@ ENTRY(entry_INT80_compat)
+        */
+       movl    %eax, %eax
+ 
+-      /* Construct struct pt_regs on stack (iret frame is already on stack) */
+       pushq   %rax                    /* pt_regs->orig_ax */
++
++      /* switch to thread stack expects orig_ax to be pushed */
++      call    switch_to_thread_stack
++
+       pushq   %rdi                    /* pt_regs->di */
+       pushq   %rsi                    /* pt_regs->si */
+       pushq   %rdx                    /* pt_regs->dx */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0157-x86-entry-64-Create-a-per-CPU-SYSCALL-entry-trampoli.patch b/patches/kernel/0157-x86-entry-64-Create-a-per-CPU-SYSCALL-entry-trampoli.patch

deleted file mode 100644 (file)

index 4319f10..0000000
--- a/patches/kernel/0157-x86-entry-64-Create-a-per-CPU-SYSCALL-entry-trampoli.patch
+++ /dev/null
@@ -1,241 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:25 +0100
-Subject: [PATCH] x86/entry/64: Create a per-CPU SYSCALL entry trampoline
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Handling SYSCALL is tricky: the SYSCALL handler is entered with every
-single register (except FLAGS), including RSP, live.  It somehow needs
-to set RSP to point to a valid stack, which means it needs to save the
-user RSP somewhere and find its own stack pointer.  The canonical way
-to do this is with SWAPGS, which lets us access percpu data using the
-%gs prefix.
-
-With PAGE_TABLE_ISOLATION-like pagetable switching, this is
-problematic.  Without a scratch register, switching CR3 is impossible, so
-%gs-based percpu memory would need to be mapped in the user pagetables.
-Doing that without information leaks is difficult or impossible.
-
-Instead, use a different sneaky trick.  Map a copy of the first part
-of the SYSCALL asm at a different address for each CPU.  Now RIP
-varies depending on the CPU, so we can use RIP-relative memory access
-to access percpu memory.  By putting the relevant information (one
-scratch slot and the stack address) at a constant offset relative to
-RIP, we can make SYSCALL work without relying on %gs.
-
-A nice thing about this approach is that we can easily switch it on
-and off if we want pagetable switching to be configurable.
-
-The compat variant of SYSCALL doesn't have this problem in the first
-place -- there are plenty of scratch registers, since we don't care
-about preserving r8-r15.  This patch therefore doesn't touch SYSCALL32
-at all.
-
-This patch actually seems to be a small speedup.  With this patch,
-SYSCALL touches an extra cache line and an extra virtual page, but
-the pipeline no longer stalls waiting for SWAPGS.  It seems that, at
-least in a tight loop, the latter outweights the former.
-
-Thanks to David Laight for an optimization tip.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bpetkov@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150606.403607157@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 3386bc8aed825e9f1f65ce38df4b109b2019b71a)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9fec5954d068a19bbf134da7af66db94699b03a3)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/fixmap.h |  2 ++
- arch/x86/kernel/asm-offsets.c |  1 +
- arch/x86/kernel/cpu/common.c  | 15 ++++++++++-
- arch/x86/entry/entry_64.S     | 58 +++++++++++++++++++++++++++++++++++++++++++
- arch/x86/kernel/vmlinux.lds.S |  9 +++++++
- 5 files changed, 84 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
-index c92fc30e6def..189d12d8afe0 100644
---- a/arch/x86/include/asm/fixmap.h
-+++ b/arch/x86/include/asm/fixmap.h
-@@ -61,6 +61,8 @@ struct cpu_entry_area {
-        * of the TSS region.
-        */
-       struct tss_struct tss;
-+
-+      char entry_trampoline[PAGE_SIZE];
- };
- 
- #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
-diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
-index f765c3253ec3..822be00c85ff 100644
---- a/arch/x86/kernel/asm-offsets.c
-+++ b/arch/x86/kernel/asm-offsets.c
-@@ -100,4 +100,5 @@ void common(void) {
- 
-       /* Layout info for cpu_entry_area */
-       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
-+      OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
- }
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 404e4b75db6e..c2b2ee73b8a1 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -486,6 +486,8 @@ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
- static inline void setup_cpu_entry_area(int cpu)
- {
- #ifdef CONFIG_X86_64
-+      extern char _entry_trampoline[];
-+
-       /* On 64-bit systems, we use a read-only fixmap GDT. */
-       pgprot_t gdt_prot = PAGE_KERNEL_RO;
- #else
-@@ -532,6 +534,11 @@ static inline void setup_cpu_entry_area(int cpu)
- #ifdef CONFIG_X86_32
-       this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu));
- #endif
-+
-+#ifdef CONFIG_X86_64
-+      __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
-+                   __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
-+#endif
- }
- 
- /* Load the original GDT from the per-cpu structure */
-@@ -1396,10 +1403,16 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
- /* May not be marked __init: used by software suspend */
- void syscall_init(void)
- {
-+      extern char _entry_trampoline[];
-+      extern char entry_SYSCALL_64_trampoline[];
-+
-       int cpu = smp_processor_id();
-+      unsigned long SYSCALL64_entry_trampoline =
-+              (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
-+              (entry_SYSCALL_64_trampoline - _entry_trampoline);
- 
-       wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
--      wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
-+      wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
- 
- #ifdef CONFIG_IA32_EMULATION
-       wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 4abe5b806d2a..dc100a7052ee 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -135,6 +135,64 @@ END(native_usergs_sysret64)
-  * with them due to bugs in both AMD and Intel CPUs.
-  */
- 
-+      .pushsection .entry_trampoline, "ax"
-+
-+/*
-+ * The code in here gets remapped into cpu_entry_area's trampoline.  This means
-+ * that the assembler and linker have the wrong idea as to where this code
-+ * lives (and, in fact, it's mapped more than once, so it's not even at a
-+ * fixed address).  So we can't reference any symbols outside the entry
-+ * trampoline and expect it to work.
-+ *
-+ * Instead, we carefully abuse %rip-relative addressing.
-+ * _entry_trampoline(%rip) refers to the start of the remapped) entry
-+ * trampoline.  We can thus find cpu_entry_area with this macro:
-+ */
-+
-+#define CPU_ENTRY_AREA \
-+      _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
-+
-+/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
-+#define RSP_SCRATCH   CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + \
-+                      SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
-+
-+ENTRY(entry_SYSCALL_64_trampoline)
-+      UNWIND_HINT_EMPTY
-+      swapgs
-+
-+      /* Stash the user RSP. */
-+      movq    %rsp, RSP_SCRATCH
-+
-+      /* Load the top of the task stack into RSP */
-+      movq    CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
-+
-+      /* Start building the simulated IRET frame. */
-+      pushq   $__USER_DS                      /* pt_regs->ss */
-+      pushq   RSP_SCRATCH                     /* pt_regs->sp */
-+      pushq   %r11                            /* pt_regs->flags */
-+      pushq   $__USER_CS                      /* pt_regs->cs */
-+      pushq   %rcx                            /* pt_regs->ip */
-+
-+      /*
-+       * x86 lacks a near absolute jump, and we can't jump to the real
-+       * entry text with a relative jump.  We could push the target
-+       * address and then use retq, but this destroys the pipeline on
-+       * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
-+       * spill RDI and restore it in a second-stage trampoline.
-+       */
-+      pushq   %rdi
-+      movq    $entry_SYSCALL_64_stage2, %rdi
-+      jmp     *%rdi
-+END(entry_SYSCALL_64_trampoline)
-+
-+      .popsection
-+
-+ENTRY(entry_SYSCALL_64_stage2)
-+      UNWIND_HINT_EMPTY
-+      popq    %rdi
-+      jmp     entry_SYSCALL_64_after_hwframe
-+END(entry_SYSCALL_64_stage2)
-+
- ENTRY(entry_SYSCALL_64)
-       UNWIND_HINT_EMPTY
-       /*
-diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
-index f05f00acac89..423aa36f0150 100644
---- a/arch/x86/kernel/vmlinux.lds.S
-+++ b/arch/x86/kernel/vmlinux.lds.S
-@@ -106,6 +106,15 @@ SECTIONS
-               SOFTIRQENTRY_TEXT
-               *(.fixup)
-               *(.gnu.warning)
-+
-+#ifdef CONFIG_X86_64
-+              . = ALIGN(PAGE_SIZE);
-+              _entry_trampoline = .;
-+              *(.entry_trampoline)
-+              . = ALIGN(PAGE_SIZE);
-+              ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
-+#endif
-+
-               /* End of text section */
-               _etext = .;
-       } :text = 0x9090
--- 
-2.14.2
-
diff --git a/patches/kernel/0157-x86-entry-64-Return-to-userspace-from-the-trampoline.patch b/patches/kernel/0157-x86-entry-64-Return-to-userspace-from-the-trampoline.patch

new file mode 100644 (file)

index 0000000..20025ac
--- /dev/null
+++ b/patches/kernel/0157-x86-entry-64-Return-to-userspace-from-the-trampoline.patch
@@ -0,0 +1,133 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:24 +0100
+Subject: [PATCH] x86/entry/64: Return to userspace from the trampoline stack
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+By itself, this is useless.  It gives us the ability to run some final code
+before exit that cannnot run on the kernel stack.  This could include a CR3
+switch a la PAGE_TABLE_ISOLATION or some kernel stack erasing, for
+example.  (Or even weird things like *changing* which kernel stack gets
+used as an ASLR-strengthening mechanism.)
+
+The SYSRET32 path is not covered yet.  It could be in the future or
+we could just ignore it and force the slow path if needed.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.306546484@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 3e3b9293d392c577b62e24e4bc9982320438e749)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 40eb58584f732a2fefb5959e79e408bedeaaa43c)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S | 55 +++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 51 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index f70fedc58bac..4abe5b806d2a 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -325,8 +325,24 @@ syscall_return_via_sysret:
+       popq    %rsi    /* skip rcx */
+       popq    %rdx
+       popq    %rsi
++
++      /*
++       * Now all regs are restored except RSP and RDI.
++       * Save old stack pointer and switch to trampoline stack.
++       */
++      movq    %rsp, %rdi
++      movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
++
++      pushq   RSP-RDI(%rdi)   /* RSP */
++      pushq   (%rdi)          /* RDI */
++
++      /*
++       * We are on the trampoline stack.  All regs except RDI are live.
++       * We can do future final exit work right here.
++       */
++
+       popq    %rdi
+-      movq    RSP-ORIG_RAX(%rsp), %rsp
++      popq    %rsp
+       USERGS_SYSRET64
+ END(entry_SYSCALL_64)
+ 
+@@ -629,10 +645,41 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+       ud2
+ 1:
+ #endif
+-      SWAPGS
+       POP_EXTRA_REGS
+-      POP_C_REGS
+-      addq    $8, %rsp        /* skip regs->orig_ax */
++      popq    %r11
++      popq    %r10
++      popq    %r9
++      popq    %r8
++      popq    %rax
++      popq    %rcx
++      popq    %rdx
++      popq    %rsi
++
++      /*
++       * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
++       * Save old stack pointer and switch to trampoline stack.
++       */
++      movq    %rsp, %rdi
++      movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
++
++      /* Copy the IRET frame to the trampoline stack. */
++      pushq   6*8(%rdi)       /* SS */
++      pushq   5*8(%rdi)       /* RSP */
++      pushq   4*8(%rdi)       /* EFLAGS */
++      pushq   3*8(%rdi)       /* CS */
++      pushq   2*8(%rdi)       /* RIP */
++
++      /* Push user RDI on the trampoline stack. */
++      pushq   (%rdi)
++
++      /*
++       * We are on the trampoline stack.  All regs except RDI are live.
++       * We can do future final exit work right here.
++       */
++
++      /* Restore RDI. */
++      popq    %rdi
++      SWAPGS
+       INTERRUPT_RETURN
+ 
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0158-x86-entry-64-Create-a-per-CPU-SYSCALL-entry-trampoli.patch b/patches/kernel/0158-x86-entry-64-Create-a-per-CPU-SYSCALL-entry-trampoli.patch

new file mode 100644 (file)

index 0000000..4319f10
--- /dev/null
+++ b/patches/kernel/0158-x86-entry-64-Create-a-per-CPU-SYSCALL-entry-trampoli.patch
@@ -0,0 +1,241 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:25 +0100
+Subject: [PATCH] x86/entry/64: Create a per-CPU SYSCALL entry trampoline
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Handling SYSCALL is tricky: the SYSCALL handler is entered with every
+single register (except FLAGS), including RSP, live.  It somehow needs
+to set RSP to point to a valid stack, which means it needs to save the
+user RSP somewhere and find its own stack pointer.  The canonical way
+to do this is with SWAPGS, which lets us access percpu data using the
+%gs prefix.
+
+With PAGE_TABLE_ISOLATION-like pagetable switching, this is
+problematic.  Without a scratch register, switching CR3 is impossible, so
+%gs-based percpu memory would need to be mapped in the user pagetables.
+Doing that without information leaks is difficult or impossible.
+
+Instead, use a different sneaky trick.  Map a copy of the first part
+of the SYSCALL asm at a different address for each CPU.  Now RIP
+varies depending on the CPU, so we can use RIP-relative memory access
+to access percpu memory.  By putting the relevant information (one
+scratch slot and the stack address) at a constant offset relative to
+RIP, we can make SYSCALL work without relying on %gs.
+
+A nice thing about this approach is that we can easily switch it on
+and off if we want pagetable switching to be configurable.
+
+The compat variant of SYSCALL doesn't have this problem in the first
+place -- there are plenty of scratch registers, since we don't care
+about preserving r8-r15.  This patch therefore doesn't touch SYSCALL32
+at all.
+
+This patch actually seems to be a small speedup.  With this patch,
+SYSCALL touches an extra cache line and an extra virtual page, but
+the pipeline no longer stalls waiting for SWAPGS.  It seems that, at
+least in a tight loop, the latter outweights the former.
+
+Thanks to David Laight for an optimization tip.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bpetkov@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.403607157@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 3386bc8aed825e9f1f65ce38df4b109b2019b71a)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9fec5954d068a19bbf134da7af66db94699b03a3)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/fixmap.h |  2 ++
+ arch/x86/kernel/asm-offsets.c |  1 +
+ arch/x86/kernel/cpu/common.c  | 15 ++++++++++-
+ arch/x86/entry/entry_64.S     | 58 +++++++++++++++++++++++++++++++++++++++++++
+ arch/x86/kernel/vmlinux.lds.S |  9 +++++++
+ 5 files changed, 84 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
+index c92fc30e6def..189d12d8afe0 100644
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -61,6 +61,8 @@ struct cpu_entry_area {
+        * of the TSS region.
+        */
+       struct tss_struct tss;
++
++      char entry_trampoline[PAGE_SIZE];
+ };
+ 
+ #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
+diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
+index f765c3253ec3..822be00c85ff 100644
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -100,4 +100,5 @@ void common(void) {
+ 
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
++      OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+ }
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 404e4b75db6e..c2b2ee73b8a1 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -486,6 +486,8 @@ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+ static inline void setup_cpu_entry_area(int cpu)
+ {
+ #ifdef CONFIG_X86_64
++      extern char _entry_trampoline[];
++
+       /* On 64-bit systems, we use a read-only fixmap GDT. */
+       pgprot_t gdt_prot = PAGE_KERNEL_RO;
+ #else
+@@ -532,6 +534,11 @@ static inline void setup_cpu_entry_area(int cpu)
+ #ifdef CONFIG_X86_32
+       this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu));
+ #endif
++
++#ifdef CONFIG_X86_64
++      __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
++                   __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
++#endif
+ }
+ 
+ /* Load the original GDT from the per-cpu structure */
+@@ -1396,10 +1403,16 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+ /* May not be marked __init: used by software suspend */
+ void syscall_init(void)
+ {
++      extern char _entry_trampoline[];
++      extern char entry_SYSCALL_64_trampoline[];
++
+       int cpu = smp_processor_id();
++      unsigned long SYSCALL64_entry_trampoline =
++              (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
++              (entry_SYSCALL_64_trampoline - _entry_trampoline);
+ 
+       wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
+-      wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
++      wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
+ 
+ #ifdef CONFIG_IA32_EMULATION
+       wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 4abe5b806d2a..dc100a7052ee 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -135,6 +135,64 @@ END(native_usergs_sysret64)
+  * with them due to bugs in both AMD and Intel CPUs.
+  */
+ 
++      .pushsection .entry_trampoline, "ax"
++
++/*
++ * The code in here gets remapped into cpu_entry_area's trampoline.  This means
++ * that the assembler and linker have the wrong idea as to where this code
++ * lives (and, in fact, it's mapped more than once, so it's not even at a
++ * fixed address).  So we can't reference any symbols outside the entry
++ * trampoline and expect it to work.
++ *
++ * Instead, we carefully abuse %rip-relative addressing.
++ * _entry_trampoline(%rip) refers to the start of the remapped) entry
++ * trampoline.  We can thus find cpu_entry_area with this macro:
++ */
++
++#define CPU_ENTRY_AREA \
++      _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
++
++/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
++#define RSP_SCRATCH   CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + \
++                      SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
++
++ENTRY(entry_SYSCALL_64_trampoline)
++      UNWIND_HINT_EMPTY
++      swapgs
++
++      /* Stash the user RSP. */
++      movq    %rsp, RSP_SCRATCH
++
++      /* Load the top of the task stack into RSP */
++      movq    CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
++
++      /* Start building the simulated IRET frame. */
++      pushq   $__USER_DS                      /* pt_regs->ss */
++      pushq   RSP_SCRATCH                     /* pt_regs->sp */
++      pushq   %r11                            /* pt_regs->flags */
++      pushq   $__USER_CS                      /* pt_regs->cs */
++      pushq   %rcx                            /* pt_regs->ip */
++
++      /*
++       * x86 lacks a near absolute jump, and we can't jump to the real
++       * entry text with a relative jump.  We could push the target
++       * address and then use retq, but this destroys the pipeline on
++       * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
++       * spill RDI and restore it in a second-stage trampoline.
++       */
++      pushq   %rdi
++      movq    $entry_SYSCALL_64_stage2, %rdi
++      jmp     *%rdi
++END(entry_SYSCALL_64_trampoline)
++
++      .popsection
++
++ENTRY(entry_SYSCALL_64_stage2)
++      UNWIND_HINT_EMPTY
++      popq    %rdi
++      jmp     entry_SYSCALL_64_after_hwframe
++END(entry_SYSCALL_64_stage2)
++
+ ENTRY(entry_SYSCALL_64)
+       UNWIND_HINT_EMPTY
+       /*
+diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
+index f05f00acac89..423aa36f0150 100644
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -106,6 +106,15 @@ SECTIONS
+               SOFTIRQENTRY_TEXT
+               *(.fixup)
+               *(.gnu.warning)
++
++#ifdef CONFIG_X86_64
++              . = ALIGN(PAGE_SIZE);
++              _entry_trampoline = .;
++              *(.entry_trampoline)
++              . = ALIGN(PAGE_SIZE);
++              ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
++#endif
++
+               /* End of text section */
+               _etext = .;
+       } :text = 0x9090
+-- 
+2.14.2
+
diff --git a/patches/kernel/0158-x86-entry-64-Move-the-IST-stacks-into-struct-cpu_ent.patch b/patches/kernel/0158-x86-entry-64-Move-the-IST-stacks-into-struct-cpu_ent.patch

deleted file mode 100644 (file)

index 762ca88..0000000
--- a/patches/kernel/0158-x86-entry-64-Move-the-IST-stacks-into-struct-cpu_ent.patch
+++ /dev/null
@@ -1,234 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:26 +0100
-Subject: [PATCH] x86/entry/64: Move the IST stacks into struct cpu_entry_area
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The IST stacks are needed when an IST exception occurs and are accessed
-before any kernel code at all runs.  Move them into struct cpu_entry_area.
-
-The IST stacks are unlike the rest of cpu_entry_area: they're used even for
-entries from kernel mode.  This means that they should be set up before we
-load the final IDT.  Move cpu_entry_area setup to trap_init() for the boot
-CPU and set it up for all possible CPUs at once in native_smp_prepare_cpus().
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150606.480598743@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit 40e7f949e0d9a33968ebde5d67f7e3a47c97742a)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 88e7277709f2e7c023e66ff9ae158aeff4cf7c8f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/fixmap.h | 12 +++++++
- arch/x86/kernel/cpu/common.c  | 74 ++++++++++++++++++++++++-------------------
- arch/x86/kernel/traps.c       |  3 ++
- 3 files changed, 57 insertions(+), 32 deletions(-)
-
-diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
-index 189d12d8afe0..953aed54cb5e 100644
---- a/arch/x86/include/asm/fixmap.h
-+++ b/arch/x86/include/asm/fixmap.h
-@@ -63,10 +63,22 @@ struct cpu_entry_area {
-       struct tss_struct tss;
- 
-       char entry_trampoline[PAGE_SIZE];
-+
-+#ifdef CONFIG_X86_64
-+      /*
-+       * Exception stacks used for IST entries.
-+       *
-+       * In the future, this should have a separate slot for each stack
-+       * with guard pages between them.
-+       */
-+      char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
-+#endif
- };
- 
- #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
- 
-+extern void setup_cpu_entry_areas(void);
-+
- /*
-  * Here we define all the compile-time 'special' virtual
-  * addresses. The point is to have a constant address at
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index c2b2ee73b8a1..f487766855d3 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -466,24 +466,36 @@ void load_percpu_segment(int cpu)
-       load_stack_canary_segment();
- }
- 
--static void set_percpu_fixmap_pages(int fixmap_index, void *ptr,
--                                  int pages, pgprot_t prot)
--{
--      int i;
--
--      for (i = 0; i < pages; i++) {
--              __set_fixmap(fixmap_index - i,
--                           per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot);
--      }
--}
--
- #ifdef CONFIG_X86_32
- /* The 32-bit entry code needs to find cpu_entry_area. */
- DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
- #endif
- 
-+#ifdef CONFIG_X86_64
-+/*
-+ * Special IST stacks which the CPU switches to when it calls
-+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
-+ * limit), all of them are 4K, except the debug stack which
-+ * is 8K.
-+ */
-+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
-+        [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
-+        [DEBUG_STACK - 1]                     = DEBUG_STKSZ
-+};
-+
-+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-+      [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-+#endif
-+
-+static void __init
-+set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
-+{
-+      for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
-+              __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
-+}
-+
- /* Setup the fixmap mappings only once per-processor */
--static inline void setup_cpu_entry_area(int cpu)
-+static void __init setup_cpu_entry_area(int cpu)
- {
- #ifdef CONFIG_X86_64
-       extern char _entry_trampoline[];
-@@ -532,15 +544,31 @@ static inline void setup_cpu_entry_area(int cpu)
-                               PAGE_KERNEL);
- 
- #ifdef CONFIG_X86_32
--      this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu));
-+      per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
- #endif
- 
- #ifdef CONFIG_X86_64
-+      BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
-+      BUILD_BUG_ON(sizeof(exception_stacks) !=
-+                   sizeof(((struct cpu_entry_area *)0)->exception_stacks));
-+      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
-+                              &per_cpu(exception_stacks, cpu),
-+                              sizeof(exception_stacks) / PAGE_SIZE,
-+                              PAGE_KERNEL);
-+
-       __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
-                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
- #endif
- }
- 
-+void __init setup_cpu_entry_areas(void)
-+{
-+      unsigned int cpu;
-+
-+      for_each_possible_cpu(cpu)
-+              setup_cpu_entry_area(cpu);
-+}
-+
- /* Load the original GDT from the per-cpu structure */
- void load_direct_gdt(int cpu)
- {
-@@ -1386,20 +1414,6 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
- DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
- EXPORT_PER_CPU_SYMBOL(__preempt_count);
- 
--/*
-- * Special IST stacks which the CPU switches to when it calls
-- * an IST-marked descriptor entry. Up to 7 stacks (hardware
-- * limit), all of them are 4K, except the debug stack which
-- * is 8K.
-- */
--static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
--        [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
--        [DEBUG_STACK - 1]                     = DEBUG_STKSZ
--};
--
--static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
--      [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
--
- /* May not be marked __init: used by software suspend */
- void syscall_init(void)
- {
-@@ -1608,7 +1622,7 @@ void cpu_init(void)
-        * set up and load the per-CPU TSS
-        */
-       if (!oist->ist[0]) {
--              char *estacks = per_cpu(exception_stacks, cpu);
-+              char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
- 
-               for (v = 0; v < N_EXCEPTION_STACKS; v++) {
-                       estacks += exception_stack_sizes[v];
-@@ -1633,8 +1647,6 @@ void cpu_init(void)
-       BUG_ON(me->mm);
-       enter_lazy_tlb(&init_mm, me);
- 
--      setup_cpu_entry_area(cpu);
--
-       /*
-        * Initialize the TSS.  sp0 points to the entry trampoline stack
-        * regardless of what task is running.
-@@ -1693,8 +1705,6 @@ void cpu_init(void)
-       BUG_ON(curr->mm);
-       enter_lazy_tlb(&init_mm, curr);
- 
--      setup_cpu_entry_area(cpu);
--
-       /*
-        * Initialize the TSS.  Don't bother initializing sp0, as the initial
-        * task never enters user mode.
-diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
-index d9debdafe7a6..fd4d47e8672e 100644
---- a/arch/x86/kernel/traps.c
-+++ b/arch/x86/kernel/traps.c
-@@ -992,6 +992,9 @@ void __init trap_init(void)
- {
-       int i;
- 
-+      /* Init cpu_entry_area before IST entries are set up */
-+      setup_cpu_entry_areas();
-+
- #ifdef CONFIG_EISA
-       void __iomem *p = early_ioremap(0x0FFFD9, 4);
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0159-x86-entry-64-Move-the-IST-stacks-into-struct-cpu_ent.patch b/patches/kernel/0159-x86-entry-64-Move-the-IST-stacks-into-struct-cpu_ent.patch

new file mode 100644 (file)

index 0000000..762ca88
--- /dev/null
+++ b/patches/kernel/0159-x86-entry-64-Move-the-IST-stacks-into-struct-cpu_ent.patch
@@ -0,0 +1,234 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:26 +0100
+Subject: [PATCH] x86/entry/64: Move the IST stacks into struct cpu_entry_area
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The IST stacks are needed when an IST exception occurs and are accessed
+before any kernel code at all runs.  Move them into struct cpu_entry_area.
+
+The IST stacks are unlike the rest of cpu_entry_area: they're used even for
+entries from kernel mode.  This means that they should be set up before we
+load the final IDT.  Move cpu_entry_area setup to trap_init() for the boot
+CPU and set it up for all possible CPUs at once in native_smp_prepare_cpus().
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.480598743@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit 40e7f949e0d9a33968ebde5d67f7e3a47c97742a)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 88e7277709f2e7c023e66ff9ae158aeff4cf7c8f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/fixmap.h | 12 +++++++
+ arch/x86/kernel/cpu/common.c  | 74 ++++++++++++++++++++++++-------------------
+ arch/x86/kernel/traps.c       |  3 ++
+ 3 files changed, 57 insertions(+), 32 deletions(-)
+
+diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
+index 189d12d8afe0..953aed54cb5e 100644
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -63,10 +63,22 @@ struct cpu_entry_area {
+       struct tss_struct tss;
+ 
+       char entry_trampoline[PAGE_SIZE];
++
++#ifdef CONFIG_X86_64
++      /*
++       * Exception stacks used for IST entries.
++       *
++       * In the future, this should have a separate slot for each stack
++       * with guard pages between them.
++       */
++      char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
++#endif
+ };
+ 
+ #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
+ 
++extern void setup_cpu_entry_areas(void);
++
+ /*
+  * Here we define all the compile-time 'special' virtual
+  * addresses. The point is to have a constant address at
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index c2b2ee73b8a1..f487766855d3 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -466,24 +466,36 @@ void load_percpu_segment(int cpu)
+       load_stack_canary_segment();
+ }
+ 
+-static void set_percpu_fixmap_pages(int fixmap_index, void *ptr,
+-                                  int pages, pgprot_t prot)
+-{
+-      int i;
+-
+-      for (i = 0; i < pages; i++) {
+-              __set_fixmap(fixmap_index - i,
+-                           per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot);
+-      }
+-}
+-
+ #ifdef CONFIG_X86_32
+ /* The 32-bit entry code needs to find cpu_entry_area. */
+ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+ #endif
+ 
++#ifdef CONFIG_X86_64
++/*
++ * Special IST stacks which the CPU switches to when it calls
++ * an IST-marked descriptor entry. Up to 7 stacks (hardware
++ * limit), all of them are 4K, except the debug stack which
++ * is 8K.
++ */
++static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
++        [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
++        [DEBUG_STACK - 1]                     = DEBUG_STKSZ
++};
++
++static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
++      [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
++#endif
++
++static void __init
++set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
++{
++      for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
++              __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
++}
++
+ /* Setup the fixmap mappings only once per-processor */
+-static inline void setup_cpu_entry_area(int cpu)
++static void __init setup_cpu_entry_area(int cpu)
+ {
+ #ifdef CONFIG_X86_64
+       extern char _entry_trampoline[];
+@@ -532,15 +544,31 @@ static inline void setup_cpu_entry_area(int cpu)
+                               PAGE_KERNEL);
+ 
+ #ifdef CONFIG_X86_32
+-      this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu));
++      per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+ #endif
+ 
+ #ifdef CONFIG_X86_64
++      BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
++      BUILD_BUG_ON(sizeof(exception_stacks) !=
++                   sizeof(((struct cpu_entry_area *)0)->exception_stacks));
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
++                              &per_cpu(exception_stacks, cpu),
++                              sizeof(exception_stacks) / PAGE_SIZE,
++                              PAGE_KERNEL);
++
+       __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
+                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+ #endif
+ }
+ 
++void __init setup_cpu_entry_areas(void)
++{
++      unsigned int cpu;
++
++      for_each_possible_cpu(cpu)
++              setup_cpu_entry_area(cpu);
++}
++
+ /* Load the original GDT from the per-cpu structure */
+ void load_direct_gdt(int cpu)
+ {
+@@ -1386,20 +1414,6 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
+ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
+ EXPORT_PER_CPU_SYMBOL(__preempt_count);
+ 
+-/*
+- * Special IST stacks which the CPU switches to when it calls
+- * an IST-marked descriptor entry. Up to 7 stacks (hardware
+- * limit), all of them are 4K, except the debug stack which
+- * is 8K.
+- */
+-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+-        [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
+-        [DEBUG_STACK - 1]                     = DEBUG_STKSZ
+-};
+-
+-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+-      [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+-
+ /* May not be marked __init: used by software suspend */
+ void syscall_init(void)
+ {
+@@ -1608,7 +1622,7 @@ void cpu_init(void)
+        * set up and load the per-CPU TSS
+        */
+       if (!oist->ist[0]) {
+-              char *estacks = per_cpu(exception_stacks, cpu);
++              char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
+ 
+               for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+                       estacks += exception_stack_sizes[v];
+@@ -1633,8 +1647,6 @@ void cpu_init(void)
+       BUG_ON(me->mm);
+       enter_lazy_tlb(&init_mm, me);
+ 
+-      setup_cpu_entry_area(cpu);
+-
+       /*
+        * Initialize the TSS.  sp0 points to the entry trampoline stack
+        * regardless of what task is running.
+@@ -1693,8 +1705,6 @@ void cpu_init(void)
+       BUG_ON(curr->mm);
+       enter_lazy_tlb(&init_mm, curr);
+ 
+-      setup_cpu_entry_area(cpu);
+-
+       /*
+        * Initialize the TSS.  Don't bother initializing sp0, as the initial
+        * task never enters user mode.
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index d9debdafe7a6..fd4d47e8672e 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -992,6 +992,9 @@ void __init trap_init(void)
+ {
+       int i;
+ 
++      /* Init cpu_entry_area before IST entries are set up */
++      setup_cpu_entry_areas();
++
+ #ifdef CONFIG_EISA
+       void __iomem *p = early_ioremap(0x0FFFD9, 4);
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0159-x86-entry-64-Remove-the-SYSENTER-stack-canary.patch b/patches/kernel/0159-x86-entry-64-Remove-the-SYSENTER-stack-canary.patch

deleted file mode 100644 (file)

index 19f11f8..0000000
--- a/patches/kernel/0159-x86-entry-64-Remove-the-SYSENTER-stack-canary.patch
+++ /dev/null
@@ -1,111 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:27 +0100
-Subject: [PATCH] x86/entry/64: Remove the SYSENTER stack canary
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Now that the SYSENTER stack has a guard page, there's no need for a canary
-to detect overflow after the fact.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150606.572577316@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 7fbbd5cbebf118a9e09f5453f686656a167c3d1c)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8158adf795cb48be67891feacacc36d7a247afdf)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/processor.h | 1 -
- arch/x86/kernel/dumpstack.c      | 3 +--
- arch/x86/kernel/process.c        | 1 -
- arch/x86/kernel/traps.c          | 7 -------
- 4 files changed, 1 insertion(+), 11 deletions(-)
-
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 1bfe4bad797a..4737d378d7b5 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -335,7 +335,6 @@ struct tss_struct {
-        * Space for the temporary SYSENTER stack, used for SYSENTER
-        * and the entry trampoline as well.
-        */
--      unsigned long           SYSENTER_stack_canary;
-       unsigned long           SYSENTER_stack[64];
- 
-       /*
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index c1f503673f1e..c32c6cce9dcc 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -48,8 +48,7 @@ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
-       int cpu = smp_processor_id();
-       struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss;
- 
--      /* Treat the canary as part of the stack for unwinding purposes. */
--      void *begin = &tss->SYSENTER_stack_canary;
-+      void *begin = &tss->SYSENTER_stack;
-       void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack);
- 
-       if ((void *)stack < begin || (void *)stack >= end)
-diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index 407fc37a8718..ec758390d24e 100644
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -80,7 +80,6 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
-         */
-       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
- #endif
--      .SYSENTER_stack_canary  = STACK_END_MAGIC,
- };
- EXPORT_PER_CPU_SYMBOL(cpu_tss);
- 
-diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
-index fd4d47e8672e..2818c83892b3 100644
---- a/arch/x86/kernel/traps.c
-+++ b/arch/x86/kernel/traps.c
-@@ -826,13 +826,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
-       debug_stack_usage_dec();
- 
- exit:
--      /*
--       * This is the most likely code path that involves non-trivial use
--       * of the SYSENTER stack.  Check that we haven't overrun it.
--       */
--      WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
--           "Overran or corrupted SYSENTER stack\n");
--
-       ist_exit(regs);
- }
- NOKPROBE_SYMBOL(do_debug);
--- 
-2.14.2
-
diff --git a/patches/kernel/0160-x86-entry-64-Remove-the-SYSENTER-stack-canary.patch b/patches/kernel/0160-x86-entry-64-Remove-the-SYSENTER-stack-canary.patch

new file mode 100644 (file)

index 0000000..19f11f8
--- /dev/null
+++ b/patches/kernel/0160-x86-entry-64-Remove-the-SYSENTER-stack-canary.patch
@@ -0,0 +1,111 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:27 +0100
+Subject: [PATCH] x86/entry/64: Remove the SYSENTER stack canary
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Now that the SYSENTER stack has a guard page, there's no need for a canary
+to detect overflow after the fact.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.572577316@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 7fbbd5cbebf118a9e09f5453f686656a167c3d1c)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8158adf795cb48be67891feacacc36d7a247afdf)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/processor.h | 1 -
+ arch/x86/kernel/dumpstack.c      | 3 +--
+ arch/x86/kernel/process.c        | 1 -
+ arch/x86/kernel/traps.c          | 7 -------
+ 4 files changed, 1 insertion(+), 11 deletions(-)
+
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 1bfe4bad797a..4737d378d7b5 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -335,7 +335,6 @@ struct tss_struct {
+        * Space for the temporary SYSENTER stack, used for SYSENTER
+        * and the entry trampoline as well.
+        */
+-      unsigned long           SYSENTER_stack_canary;
+       unsigned long           SYSENTER_stack[64];
+ 
+       /*
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index c1f503673f1e..c32c6cce9dcc 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -48,8 +48,7 @@ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
+       int cpu = smp_processor_id();
+       struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss;
+ 
+-      /* Treat the canary as part of the stack for unwinding purposes. */
+-      void *begin = &tss->SYSENTER_stack_canary;
++      void *begin = &tss->SYSENTER_stack;
+       void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack);
+ 
+       if ((void *)stack < begin || (void *)stack >= end)
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index 407fc37a8718..ec758390d24e 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -80,7 +80,6 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+         */
+       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
+ #endif
+-      .SYSENTER_stack_canary  = STACK_END_MAGIC,
+ };
+ EXPORT_PER_CPU_SYMBOL(cpu_tss);
+ 
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index fd4d47e8672e..2818c83892b3 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -826,13 +826,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
+       debug_stack_usage_dec();
+ 
+ exit:
+-      /*
+-       * This is the most likely code path that involves non-trivial use
+-       * of the SYSENTER stack.  Check that we haven't overrun it.
+-       */
+-      WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
+-           "Overran or corrupted SYSENTER stack\n");
+-
+       ist_exit(regs);
+ }
+ NOKPROBE_SYMBOL(do_debug);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0160-x86-entry-Clean-up-the-SYSENTER_stack-code.patch b/patches/kernel/0160-x86-entry-Clean-up-the-SYSENTER_stack-code.patch

deleted file mode 100644 (file)

index e8b5e85..0000000
--- a/patches/kernel/0160-x86-entry-Clean-up-the-SYSENTER_stack-code.patch
+++ /dev/null
@@ -1,205 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:28 +0100
-Subject: [PATCH] x86/entry: Clean up the SYSENTER_stack code
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The existing code was a mess, mainly because C arrays are nasty.  Turn
-SYSENTER_stack into a struct, add a helper to find it, and do all the
-obvious cleanups this enables.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bpetkov@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150606.653244723@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 0f9a48100fba3f189724ae88a450c2261bf91c80)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit a308af33c794110c52427ad11d3a6d35ffc14b76)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/fixmap.h    |  5 +++++
- arch/x86/include/asm/processor.h |  6 +++++-
- arch/x86/kernel/asm-offsets.c    |  6 ++----
- arch/x86/kernel/cpu/common.c     | 14 +++-----------
- arch/x86/kernel/dumpstack.c      |  7 +++----
- arch/x86/entry/entry_32.S        |  4 ++--
- arch/x86/entry/entry_64.S        |  2 +-
- 7 files changed, 21 insertions(+), 23 deletions(-)
-
-diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
-index 953aed54cb5e..56aaffbbffd6 100644
---- a/arch/x86/include/asm/fixmap.h
-+++ b/arch/x86/include/asm/fixmap.h
-@@ -225,5 +225,10 @@ static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
-       return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
- }
- 
-+static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
-+{
-+      return &get_cpu_entry_area(cpu)->tss.SYSENTER_stack;
-+}
-+
- #endif /* !__ASSEMBLY__ */
- #endif /* _ASM_X86_FIXMAP_H */
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 4737d378d7b5..2d489a414a86 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -330,12 +330,16 @@ struct x86_hw_tss {
- #define IO_BITMAP_OFFSET              (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
- #define INVALID_IO_BITMAP_OFFSET      0x8000
- 
-+struct SYSENTER_stack {
-+      unsigned long           words[64];
-+};
-+
- struct tss_struct {
-       /*
-        * Space for the temporary SYSENTER stack, used for SYSENTER
-        * and the entry trampoline as well.
-        */
--      unsigned long           SYSENTER_stack[64];
-+      struct SYSENTER_stack   SYSENTER_stack;
- 
-       /*
-        * The fixed hardware portion.  This must not cross a page boundary
-diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
-index 822be00c85ff..00ea20bfa857 100644
---- a/arch/x86/kernel/asm-offsets.c
-+++ b/arch/x86/kernel/asm-offsets.c
-@@ -93,10 +93,8 @@ void common(void) {
-       BLANK();
-       DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
- 
--      /* Offset from cpu_tss to SYSENTER_stack */
--      OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
--      /* Size of SYSENTER_stack */
--      DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
-+      OFFSET(TSS_STRUCT_SYSENTER_stack, tss_struct, SYSENTER_stack);
-+      DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
- 
-       /* Layout info for cpu_entry_area */
-       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index f487766855d3..f9541c48c290 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -1306,12 +1306,7 @@ void enable_sep_cpu(void)
- 
-       tss->x86_tss.ss1 = __KERNEL_CS;
-       wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
--
--      wrmsr(MSR_IA32_SYSENTER_ESP,
--            (unsigned long)&get_cpu_entry_area(cpu)->tss +
--            offsetofend(struct tss_struct, SYSENTER_stack),
--            0);
--
-+      wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
-       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
- 
-       put_cpu();
-@@ -1437,9 +1432,7 @@ void syscall_init(void)
-        * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
-        */
-       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
--      wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
--                  (unsigned long)&get_cpu_entry_area(cpu)->tss +
--                  offsetofend(struct tss_struct, SYSENTER_stack));
-+      wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
-       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
- #else
-       wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
-@@ -1653,8 +1646,7 @@ void cpu_init(void)
-        */
-       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
-       load_TR_desc();
--      load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss +
--               offsetofend(struct tss_struct, SYSENTER_stack));
-+      load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
- 
-       load_mm_ldt(&init_mm);
- 
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index c32c6cce9dcc..b005e5ef6738 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -45,11 +45,10 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
- 
- bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
- {
--      int cpu = smp_processor_id();
--      struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss;
-+      struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
- 
--      void *begin = &tss->SYSENTER_stack;
--      void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack);
-+      void *begin = ss;
-+      void *end = ss + 1;
- 
-       if ((void *)stack < begin || (void *)stack >= end)
-               return false;
-diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
-index 41e0e103f090..04abcd3f8e2d 100644
---- a/arch/x86/entry/entry_32.S
-+++ b/arch/x86/entry/entry_32.S
-@@ -949,7 +949,7 @@ ENTRY(debug)
- 
-       /* Are we currently on the SYSENTER stack? */
-       movl    PER_CPU_VAR(cpu_entry_area), %ecx
--      addl    $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
-+      addl    $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
-       jb      .Ldebug_from_sysenter_stack
-@@ -993,7 +993,7 @@ ENTRY(nmi)
- 
-       /* Are we currently on the SYSENTER stack? */
-       movl    PER_CPU_VAR(cpu_entry_area), %ecx
--      addl    $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
-+      addl    $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
-       jb      .Lnmi_from_sysenter_stack
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index dc100a7052ee..7a5e9edcdaf4 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -153,7 +153,7 @@ END(native_usergs_sysret64)
-       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
- 
- /* The top word of the SYSENTER stack is hot and is usable as scratch space. */
--#define RSP_SCRATCH   CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + \
-+#define RSP_SCRATCH   CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + \
-                       SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
- 
- ENTRY(entry_SYSCALL_64_trampoline)
--- 
-2.14.2
-
diff --git a/patches/kernel/0161-x86-entry-64-Make-cpu_entry_area.tss-read-only.patch b/patches/kernel/0161-x86-entry-64-Make-cpu_entry_area.tss-read-only.patch

deleted file mode 100644 (file)

index 42ae5cd..0000000
--- a/patches/kernel/0161-x86-entry-64-Make-cpu_entry_area.tss-read-only.patch
+++ /dev/null
@@ -1,492 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:29 +0100
-Subject: [PATCH] x86/entry/64: Make cpu_entry_area.tss read-only
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The TSS is a fairly juicy target for exploits, and, now that the TSS
-is in the cpu_entry_area, it's no longer protected by kASLR.  Make it
-read-only on x86_64.
-
-On x86_32, it can't be RO because it's written by the CPU during task
-switches, and we use a task gate for double faults.  I'd also be
-nervous about errata if we tried to make it RO even on configurations
-without double fault handling.
-
-[ tglx: AMD confirmed that there is no problem on 64-bit with TSS RO.  So
-       it's probably safe to assume that it's a non issue, though Intel
-       might have been creative in that area. Still waiting for
-       confirmation. ]
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bpetkov@suse.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Kees Cook <keescook@chromium.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150606.733700132@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit c482feefe1aeb150156248ba0fd3e029bc886605)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 785be108f90cd62eab2da17490714085ef752538)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/fixmap.h      | 13 +++++++++----
- arch/x86/include/asm/processor.h   | 17 ++++++++---------
- arch/x86/include/asm/switch_to.h   |  4 ++--
- arch/x86/include/asm/thread_info.h |  2 +-
- arch/x86/kernel/asm-offsets.c      |  5 ++---
- arch/x86/kernel/asm-offsets_32.c   |  4 ++--
- arch/x86/kernel/cpu/common.c       | 29 +++++++++++++++++++----------
- arch/x86/kernel/ioport.c           |  2 +-
- arch/x86/kernel/process.c          |  6 +++---
- arch/x86/kernel/process_32.c       |  2 +-
- arch/x86/kernel/process_64.c       |  2 +-
- arch/x86/kernel/traps.c            |  4 ++--
- arch/x86/lib/delay.c               |  4 ++--
- arch/x86/xen/enlighten_pv.c        |  2 +-
- arch/x86/entry/entry_32.S          |  4 ++--
- arch/x86/entry/entry_64.S          |  8 ++++----
- 16 files changed, 60 insertions(+), 48 deletions(-)
-
-diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
-index 56aaffbbffd6..5dc269ff4085 100644
---- a/arch/x86/include/asm/fixmap.h
-+++ b/arch/x86/include/asm/fixmap.h
-@@ -56,9 +56,14 @@ struct cpu_entry_area {
-       char gdt[PAGE_SIZE];
- 
-       /*
--       * The GDT is just below cpu_tss and thus serves (on x86_64) as a
--       * a read-only guard page for the SYSENTER stack at the bottom
--       * of the TSS region.
-+       * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
-+       * a a read-only guard page.
-+       */
-+      struct SYSENTER_stack_page SYSENTER_stack_page;
-+
-+      /*
-+       * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
-+       * we need task switches to work, and task switches write to the TSS.
-        */
-       struct tss_struct tss;
- 
-@@ -227,7 +232,7 @@ static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
- 
- static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
- {
--      return &get_cpu_entry_area(cpu)->tss.SYSENTER_stack;
-+      return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
- }
- 
- #endif /* !__ASSEMBLY__ */
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 2d489a414a86..bccec7ed1676 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -334,13 +334,11 @@ struct SYSENTER_stack {
-       unsigned long           words[64];
- };
- 
--struct tss_struct {
--      /*
--       * Space for the temporary SYSENTER stack, used for SYSENTER
--       * and the entry trampoline as well.
--       */
--      struct SYSENTER_stack   SYSENTER_stack;
-+struct SYSENTER_stack_page {
-+      struct SYSENTER_stack stack;
-+} __aligned(PAGE_SIZE);
- 
-+struct tss_struct {
-       /*
-        * The fixed hardware portion.  This must not cross a page boundary
-        * at risk of violating the SDM's advice and potentially triggering
-@@ -357,7 +355,7 @@ struct tss_struct {
-       unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
- } __aligned(PAGE_SIZE);
- 
--DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss);
-+DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
- 
- /*
-  * sizeof(unsigned long) coming from an extra "long" at the end
-@@ -372,7 +370,8 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss);
- #ifdef CONFIG_X86_32
- DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
- #else
--#define cpu_current_top_of_stack cpu_tss.x86_tss.sp1
-+/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
-+#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
- #endif
- 
- /*
-@@ -532,7 +531,7 @@ static inline void native_set_iopl_mask(unsigned mask)
- static inline void
- native_load_sp0(unsigned long sp0)
- {
--      this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
-+      this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
- }
- 
- static inline void native_swapgs(void)
-diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
-index ca2fc84ad278..cfb6dfe4c457 100644
---- a/arch/x86/include/asm/switch_to.h
-+++ b/arch/x86/include/asm/switch_to.h
-@@ -78,10 +78,10 @@ do {                                                                       \
- static inline void refresh_sysenter_cs(struct thread_struct *thread)
- {
-       /* Only happens when SEP is enabled, no need to test "SEP"arately: */
--      if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
-+      if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
-               return;
- 
--      this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
-+      this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
-       wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
- }
- #endif
-diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
-index 760dd8a73927..6275b391ac61 100644
---- a/arch/x86/include/asm/thread_info.h
-+++ b/arch/x86/include/asm/thread_info.h
-@@ -214,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack,
- #else /* !__ASSEMBLY__ */
- 
- #ifdef CONFIG_X86_64
--# define cpu_current_top_of_stack (cpu_tss + TSS_sp1)
-+# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
- #endif
- 
- #endif
-diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
-index 00ea20bfa857..40c3fab107ac 100644
---- a/arch/x86/kernel/asm-offsets.c
-+++ b/arch/x86/kernel/asm-offsets.c
-@@ -93,10 +93,9 @@ void common(void) {
-       BLANK();
-       DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
- 
--      OFFSET(TSS_STRUCT_SYSENTER_stack, tss_struct, SYSENTER_stack);
--      DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
--
-       /* Layout info for cpu_entry_area */
-       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
-       OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
-+      OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
-+      DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
- }
-diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
-index d09b161a3bd0..c4f23da7a0f0 100644
---- a/arch/x86/kernel/asm-offsets_32.c
-+++ b/arch/x86/kernel/asm-offsets_32.c
-@@ -49,8 +49,8 @@ void foo(void)
-       BLANK();
- 
-       /* Offset from the sysenter stack to tss.sp0 */
--      DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
--             offsetofend(struct tss_struct, SYSENTER_stack));
-+      DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
-+             offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
- 
- #ifdef CONFIG_CC_STACKPROTECTOR
-       BLANK();
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index f9541c48c290..7992e5a8076c 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -487,6 +487,9 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
- #endif
- 
-+static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
-+                                 SYSENTER_stack_storage);
-+
- static void __init
- set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
- {
-@@ -500,23 +503,29 @@ static void __init setup_cpu_entry_area(int cpu)
- #ifdef CONFIG_X86_64
-       extern char _entry_trampoline[];
- 
--      /* On 64-bit systems, we use a read-only fixmap GDT. */
-+      /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
-       pgprot_t gdt_prot = PAGE_KERNEL_RO;
-+      pgprot_t tss_prot = PAGE_KERNEL_RO;
- #else
-       /*
-        * On native 32-bit systems, the GDT cannot be read-only because
-        * our double fault handler uses a task gate, and entering through
--       * a task gate needs to change an available TSS to busy.  If the GDT
--       * is read-only, that will triple fault.
-+       * a task gate needs to change an available TSS to busy.  If the
-+       * GDT is read-only, that will triple fault.  The TSS cannot be
-+       * read-only because the CPU writes to it on task switches.
-        *
--       * On Xen PV, the GDT must be read-only because the hypervisor requires
--       * it.
-+       * On Xen PV, the GDT must be read-only because the hypervisor
-+       * requires it.
-        */
-       pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
-               PAGE_KERNEL_RO : PAGE_KERNEL;
-+      pgprot_t tss_prot = PAGE_KERNEL;
- #endif
- 
-       __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
-+      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
-+                              per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
-+                              PAGE_KERNEL);
- 
-       /*
-        * The Intel SDM says (Volume 3, 7.2.1):
-@@ -539,9 +548,9 @@ static void __init setup_cpu_entry_area(int cpu)
-                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
-       BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
-       set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
--                              &per_cpu(cpu_tss, cpu),
-+                              &per_cpu(cpu_tss_rw, cpu),
-                               sizeof(struct tss_struct) / PAGE_SIZE,
--                              PAGE_KERNEL);
-+                              tss_prot);
- 
- #ifdef CONFIG_X86_32
-       per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
-@@ -1297,7 +1306,7 @@ void enable_sep_cpu(void)
-               return;
- 
-       cpu = get_cpu();
--      tss = &per_cpu(cpu_tss, cpu);
-+      tss = &per_cpu(cpu_tss_rw, cpu);
- 
-       /*
-        * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
-@@ -1576,7 +1585,7 @@ void cpu_init(void)
-       if (cpu)
-               load_ucode_ap();
- 
--      t = &per_cpu(cpu_tss, cpu);
-+      t = &per_cpu(cpu_tss_rw, cpu);
-       oist = &per_cpu(orig_ist, cpu);
- 
- #ifdef CONFIG_NUMA
-@@ -1667,7 +1676,7 @@ void cpu_init(void)
- {
-       int cpu = smp_processor_id();
-       struct task_struct *curr = current;
--      struct tss_struct *t = &per_cpu(cpu_tss, cpu);
-+      struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
- 
-       wait_for_master_cpu(cpu);
- 
-diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
-index 4a613fed94b6..d13777d49d8b 100644
---- a/arch/x86/kernel/ioport.c
-+++ b/arch/x86/kernel/ioport.c
-@@ -66,7 +66,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
-        * because the ->io_bitmap_max value must match the bitmap
-        * contents:
-        */
--      tss = &per_cpu(cpu_tss, get_cpu());
-+      tss = &per_cpu(cpu_tss_rw, get_cpu());
- 
-       if (turn_on)
-               bitmap_clear(t->io_bitmap_ptr, from, num);
-diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index ec758390d24e..3688a7b9d055 100644
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -46,7 +46,7 @@
-  * section. Since TSS's are completely CPU-local, we want them
-  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
-  */
--__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
-+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
-       .x86_tss = {
-               /*
-                * .sp0 is only used when entering ring 0 from a lower
-@@ -81,7 +81,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
-       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
- #endif
- };
--EXPORT_PER_CPU_SYMBOL(cpu_tss);
-+EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
- 
- DEFINE_PER_CPU(bool, __tss_limit_invalid);
- EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
-@@ -110,7 +110,7 @@ void exit_thread(struct task_struct *tsk)
-       struct fpu *fpu = &t->fpu;
- 
-       if (bp) {
--              struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
-+              struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
- 
-               t->io_bitmap_ptr = NULL;
-               clear_thread_flag(TIF_IO_BITMAP);
-diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
-index c0d60420466c..784ff9147172 100644
---- a/arch/x86/kernel/process_32.c
-+++ b/arch/x86/kernel/process_32.c
-@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-       struct fpu *prev_fpu = &prev->fpu;
-       struct fpu *next_fpu = &next->fpu;
-       int cpu = smp_processor_id();
--      struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
-+      struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
- 
-       /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
- 
-diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
-index 157f81816915..c75466232016 100644
---- a/arch/x86/kernel/process_64.c
-+++ b/arch/x86/kernel/process_64.c
-@@ -399,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-       struct fpu *prev_fpu = &prev->fpu;
-       struct fpu *next_fpu = &next->fpu;
-       int cpu = smp_processor_id();
--      struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
-+      struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
- 
-       WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
-                    this_cpu_read(irq_count) != -1);
-diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
-index 2818c83892b3..14b462eefa17 100644
---- a/arch/x86/kernel/traps.c
-+++ b/arch/x86/kernel/traps.c
-@@ -376,7 +376,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
-               regs->cs == __KERNEL_CS &&
-               regs->ip == (unsigned long)native_irq_return_iret)
-       {
--              struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
-+              struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
- 
-               /*
-                * regs->sp points to the failing IRET frame on the
-@@ -661,7 +661,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
-        * exception came from the IRET target.
-        */
-       struct bad_iret_stack *new_stack =
--              (struct bad_iret_stack *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
-+              (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
- 
-       /* Copy the IRET target to the new stack. */
-       memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
-diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
-index 29df077cb089..cf2ac227c2ac 100644
---- a/arch/x86/lib/delay.c
-+++ b/arch/x86/lib/delay.c
-@@ -106,10 +106,10 @@ static void delay_mwaitx(unsigned long __loops)
-               delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
- 
-               /*
--               * Use cpu_tss as a cacheline-aligned, seldomly
-+               * Use cpu_tss_rw as a cacheline-aligned, seldomly
-                * accessed per-cpu variable as the monitor target.
-                */
--              __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
-+              __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
- 
-               /*
-                * AMD, like Intel, supports the EAX hint and EAX=0xf
-diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
-index 63c81154083b..3b76cf85e306 100644
---- a/arch/x86/xen/enlighten_pv.c
-+++ b/arch/x86/xen/enlighten_pv.c
-@@ -817,7 +817,7 @@ static void xen_load_sp0(unsigned long sp0)
-       mcs = xen_mc_entry(0);
-       MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
-       xen_mc_issue(PARAVIRT_LAZY_CPU);
--      this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
-+      this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
- }
- 
- void xen_set_iopl_mask(unsigned mask)
-diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
-index 04abcd3f8e2d..3ef7800007f8 100644
---- a/arch/x86/entry/entry_32.S
-+++ b/arch/x86/entry/entry_32.S
-@@ -949,7 +949,7 @@ ENTRY(debug)
- 
-       /* Are we currently on the SYSENTER stack? */
-       movl    PER_CPU_VAR(cpu_entry_area), %ecx
--      addl    $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
-+      addl    $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
-       jb      .Ldebug_from_sysenter_stack
-@@ -993,7 +993,7 @@ ENTRY(nmi)
- 
-       /* Are we currently on the SYSENTER stack? */
-       movl    PER_CPU_VAR(cpu_entry_area), %ecx
--      addl    $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
-+      addl    $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
-       jb      .Lnmi_from_sysenter_stack
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 7a5e9edcdaf4..157860b3569f 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -153,7 +153,7 @@ END(native_usergs_sysret64)
-       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
- 
- /* The top word of the SYSENTER stack is hot and is usable as scratch space. */
--#define RSP_SCRATCH   CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + \
-+#define RSP_SCRATCH   CPU_ENTRY_AREA_SYSENTER_stack + \
-                       SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
- 
- ENTRY(entry_SYSCALL_64_trampoline)
-@@ -389,7 +389,7 @@ syscall_return_via_sysret:
-        * Save old stack pointer and switch to trampoline stack.
-        */
-       movq    %rsp, %rdi
--      movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
-+      movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
- 
-       pushq   RSP-RDI(%rdi)   /* RSP */
-       pushq   (%rdi)          /* RDI */
-@@ -718,7 +718,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
-        * Save old stack pointer and switch to trampoline stack.
-        */
-       movq    %rsp, %rdi
--      movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
-+      movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
- 
-       /* Copy the IRET frame to the trampoline stack. */
-       pushq   6*8(%rdi)       /* SS */
-@@ -946,7 +946,7 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
- /*
-  * Exception entry points.
-  */
--#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
-+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
- 
- /*
-  * Switch to the thread stack.  This is called with the IRET frame and
--- 
-2.14.2
-
diff --git a/patches/kernel/0161-x86-entry-Clean-up-the-SYSENTER_stack-code.patch b/patches/kernel/0161-x86-entry-Clean-up-the-SYSENTER_stack-code.patch

new file mode 100644 (file)

index 0000000..e8b5e85
--- /dev/null
+++ b/patches/kernel/0161-x86-entry-Clean-up-the-SYSENTER_stack-code.patch
@@ -0,0 +1,205 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:28 +0100
+Subject: [PATCH] x86/entry: Clean up the SYSENTER_stack code
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The existing code was a mess, mainly because C arrays are nasty.  Turn
+SYSENTER_stack into a struct, add a helper to find it, and do all the
+obvious cleanups this enables.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bpetkov@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.653244723@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 0f9a48100fba3f189724ae88a450c2261bf91c80)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit a308af33c794110c52427ad11d3a6d35ffc14b76)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/fixmap.h    |  5 +++++
+ arch/x86/include/asm/processor.h |  6 +++++-
+ arch/x86/kernel/asm-offsets.c    |  6 ++----
+ arch/x86/kernel/cpu/common.c     | 14 +++-----------
+ arch/x86/kernel/dumpstack.c      |  7 +++----
+ arch/x86/entry/entry_32.S        |  4 ++--
+ arch/x86/entry/entry_64.S        |  2 +-
+ 7 files changed, 21 insertions(+), 23 deletions(-)
+
+diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
+index 953aed54cb5e..56aaffbbffd6 100644
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -225,5 +225,10 @@ static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
+       return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
+ }
+ 
++static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
++{
++      return &get_cpu_entry_area(cpu)->tss.SYSENTER_stack;
++}
++
+ #endif /* !__ASSEMBLY__ */
+ #endif /* _ASM_X86_FIXMAP_H */
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 4737d378d7b5..2d489a414a86 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -330,12 +330,16 @@ struct x86_hw_tss {
+ #define IO_BITMAP_OFFSET              (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
+ #define INVALID_IO_BITMAP_OFFSET      0x8000
+ 
++struct SYSENTER_stack {
++      unsigned long           words[64];
++};
++
+ struct tss_struct {
+       /*
+        * Space for the temporary SYSENTER stack, used for SYSENTER
+        * and the entry trampoline as well.
+        */
+-      unsigned long           SYSENTER_stack[64];
++      struct SYSENTER_stack   SYSENTER_stack;
+ 
+       /*
+        * The fixed hardware portion.  This must not cross a page boundary
+diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
+index 822be00c85ff..00ea20bfa857 100644
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -93,10 +93,8 @@ void common(void) {
+       BLANK();
+       DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+ 
+-      /* Offset from cpu_tss to SYSENTER_stack */
+-      OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+-      /* Size of SYSENTER_stack */
+-      DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
++      OFFSET(TSS_STRUCT_SYSENTER_stack, tss_struct, SYSENTER_stack);
++      DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
+ 
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index f487766855d3..f9541c48c290 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1306,12 +1306,7 @@ void enable_sep_cpu(void)
+ 
+       tss->x86_tss.ss1 = __KERNEL_CS;
+       wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
+-
+-      wrmsr(MSR_IA32_SYSENTER_ESP,
+-            (unsigned long)&get_cpu_entry_area(cpu)->tss +
+-            offsetofend(struct tss_struct, SYSENTER_stack),
+-            0);
+-
++      wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
+       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
+ 
+       put_cpu();
+@@ -1437,9 +1432,7 @@ void syscall_init(void)
+        * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+        */
+       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+-      wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
+-                  (unsigned long)&get_cpu_entry_area(cpu)->tss +
+-                  offsetofend(struct tss_struct, SYSENTER_stack));
++      wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
+       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+ #else
+       wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
+@@ -1653,8 +1646,7 @@ void cpu_init(void)
+        */
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+       load_TR_desc();
+-      load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss +
+-               offsetofend(struct tss_struct, SYSENTER_stack));
++      load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
+ 
+       load_mm_ldt(&init_mm);
+ 
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index c32c6cce9dcc..b005e5ef6738 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -45,11 +45,10 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
+ 
+ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
+ {
+-      int cpu = smp_processor_id();
+-      struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss;
++      struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
+ 
+-      void *begin = &tss->SYSENTER_stack;
+-      void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack);
++      void *begin = ss;
++      void *end = ss + 1;
+ 
+       if ((void *)stack < begin || (void *)stack >= end)
+               return false;
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
+index 41e0e103f090..04abcd3f8e2d 100644
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -949,7 +949,7 @@ ENTRY(debug)
+ 
+       /* Are we currently on the SYSENTER stack? */
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+-      addl    $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
++      addl    $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       jb      .Ldebug_from_sysenter_stack
+@@ -993,7 +993,7 @@ ENTRY(nmi)
+ 
+       /* Are we currently on the SYSENTER stack? */
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+-      addl    $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
++      addl    $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       jb      .Lnmi_from_sysenter_stack
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index dc100a7052ee..7a5e9edcdaf4 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -153,7 +153,7 @@ END(native_usergs_sysret64)
+       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+ 
+ /* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+-#define RSP_SCRATCH   CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + \
++#define RSP_SCRATCH   CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + \
+                       SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
+ 
+ ENTRY(entry_SYSCALL_64_trampoline)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0162-x86-entry-64-Make-cpu_entry_area.tss-read-only.patch b/patches/kernel/0162-x86-entry-64-Make-cpu_entry_area.tss-read-only.patch

new file mode 100644 (file)

index 0000000..42ae5cd
--- /dev/null
+++ b/patches/kernel/0162-x86-entry-64-Make-cpu_entry_area.tss-read-only.patch
@@ -0,0 +1,492 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:29 +0100
+Subject: [PATCH] x86/entry/64: Make cpu_entry_area.tss read-only
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The TSS is a fairly juicy target for exploits, and, now that the TSS
+is in the cpu_entry_area, it's no longer protected by kASLR.  Make it
+read-only on x86_64.
+
+On x86_32, it can't be RO because it's written by the CPU during task
+switches, and we use a task gate for double faults.  I'd also be
+nervous about errata if we tried to make it RO even on configurations
+without double fault handling.
+
+[ tglx: AMD confirmed that there is no problem on 64-bit with TSS RO.  So
+       it's probably safe to assume that it's a non issue, though Intel
+       might have been creative in that area. Still waiting for
+       confirmation. ]
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bpetkov@suse.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.733700132@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit c482feefe1aeb150156248ba0fd3e029bc886605)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 785be108f90cd62eab2da17490714085ef752538)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/fixmap.h      | 13 +++++++++----
+ arch/x86/include/asm/processor.h   | 17 ++++++++---------
+ arch/x86/include/asm/switch_to.h   |  4 ++--
+ arch/x86/include/asm/thread_info.h |  2 +-
+ arch/x86/kernel/asm-offsets.c      |  5 ++---
+ arch/x86/kernel/asm-offsets_32.c   |  4 ++--
+ arch/x86/kernel/cpu/common.c       | 29 +++++++++++++++++++----------
+ arch/x86/kernel/ioport.c           |  2 +-
+ arch/x86/kernel/process.c          |  6 +++---
+ arch/x86/kernel/process_32.c       |  2 +-
+ arch/x86/kernel/process_64.c       |  2 +-
+ arch/x86/kernel/traps.c            |  4 ++--
+ arch/x86/lib/delay.c               |  4 ++--
+ arch/x86/xen/enlighten_pv.c        |  2 +-
+ arch/x86/entry/entry_32.S          |  4 ++--
+ arch/x86/entry/entry_64.S          |  8 ++++----
+ 16 files changed, 60 insertions(+), 48 deletions(-)
+
+diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
+index 56aaffbbffd6..5dc269ff4085 100644
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -56,9 +56,14 @@ struct cpu_entry_area {
+       char gdt[PAGE_SIZE];
+ 
+       /*
+-       * The GDT is just below cpu_tss and thus serves (on x86_64) as a
+-       * a read-only guard page for the SYSENTER stack at the bottom
+-       * of the TSS region.
++       * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
++       * a a read-only guard page.
++       */
++      struct SYSENTER_stack_page SYSENTER_stack_page;
++
++      /*
++       * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
++       * we need task switches to work, and task switches write to the TSS.
+        */
+       struct tss_struct tss;
+ 
+@@ -227,7 +232,7 @@ static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
+ 
+ static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
+ {
+-      return &get_cpu_entry_area(cpu)->tss.SYSENTER_stack;
++      return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
+ }
+ 
+ #endif /* !__ASSEMBLY__ */
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 2d489a414a86..bccec7ed1676 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -334,13 +334,11 @@ struct SYSENTER_stack {
+       unsigned long           words[64];
+ };
+ 
+-struct tss_struct {
+-      /*
+-       * Space for the temporary SYSENTER stack, used for SYSENTER
+-       * and the entry trampoline as well.
+-       */
+-      struct SYSENTER_stack   SYSENTER_stack;
++struct SYSENTER_stack_page {
++      struct SYSENTER_stack stack;
++} __aligned(PAGE_SIZE);
+ 
++struct tss_struct {
+       /*
+        * The fixed hardware portion.  This must not cross a page boundary
+        * at risk of violating the SDM's advice and potentially triggering
+@@ -357,7 +355,7 @@ struct tss_struct {
+       unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
+ } __aligned(PAGE_SIZE);
+ 
+-DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss);
++DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
+ 
+ /*
+  * sizeof(unsigned long) coming from an extra "long" at the end
+@@ -372,7 +370,8 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss);
+ #ifdef CONFIG_X86_32
+ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+ #else
+-#define cpu_current_top_of_stack cpu_tss.x86_tss.sp1
++/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
++#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
+ #endif
+ 
+ /*
+@@ -532,7 +531,7 @@ static inline void native_set_iopl_mask(unsigned mask)
+ static inline void
+ native_load_sp0(unsigned long sp0)
+ {
+-      this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
++      this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
+ }
+ 
+ static inline void native_swapgs(void)
+diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
+index ca2fc84ad278..cfb6dfe4c457 100644
+--- a/arch/x86/include/asm/switch_to.h
++++ b/arch/x86/include/asm/switch_to.h
+@@ -78,10 +78,10 @@ do {                                                                       \
+ static inline void refresh_sysenter_cs(struct thread_struct *thread)
+ {
+       /* Only happens when SEP is enabled, no need to test "SEP"arately: */
+-      if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
++      if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
+               return;
+ 
+-      this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
++      this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
+       wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+ }
+ #endif
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index 760dd8a73927..6275b391ac61 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -214,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack,
+ #else /* !__ASSEMBLY__ */
+ 
+ #ifdef CONFIG_X86_64
+-# define cpu_current_top_of_stack (cpu_tss + TSS_sp1)
++# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
+ #endif
+ 
+ #endif
+diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
+index 00ea20bfa857..40c3fab107ac 100644
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -93,10 +93,9 @@ void common(void) {
+       BLANK();
+       DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+ 
+-      OFFSET(TSS_STRUCT_SYSENTER_stack, tss_struct, SYSENTER_stack);
+-      DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
+-
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+       OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
++      OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
++      DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
+ }
+diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
+index d09b161a3bd0..c4f23da7a0f0 100644
+--- a/arch/x86/kernel/asm-offsets_32.c
++++ b/arch/x86/kernel/asm-offsets_32.c
+@@ -49,8 +49,8 @@ void foo(void)
+       BLANK();
+ 
+       /* Offset from the sysenter stack to tss.sp0 */
+-      DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
+-             offsetofend(struct tss_struct, SYSENTER_stack));
++      DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
++             offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
+ 
+ #ifdef CONFIG_CC_STACKPROTECTOR
+       BLANK();
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index f9541c48c290..7992e5a8076c 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -487,6 +487,9 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+ #endif
+ 
++static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
++                                 SYSENTER_stack_storage);
++
+ static void __init
+ set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
+ {
+@@ -500,23 +503,29 @@ static void __init setup_cpu_entry_area(int cpu)
+ #ifdef CONFIG_X86_64
+       extern char _entry_trampoline[];
+ 
+-      /* On 64-bit systems, we use a read-only fixmap GDT. */
++      /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+       pgprot_t gdt_prot = PAGE_KERNEL_RO;
++      pgprot_t tss_prot = PAGE_KERNEL_RO;
+ #else
+       /*
+        * On native 32-bit systems, the GDT cannot be read-only because
+        * our double fault handler uses a task gate, and entering through
+-       * a task gate needs to change an available TSS to busy.  If the GDT
+-       * is read-only, that will triple fault.
++       * a task gate needs to change an available TSS to busy.  If the
++       * GDT is read-only, that will triple fault.  The TSS cannot be
++       * read-only because the CPU writes to it on task switches.
+        *
+-       * On Xen PV, the GDT must be read-only because the hypervisor requires
+-       * it.
++       * On Xen PV, the GDT must be read-only because the hypervisor
++       * requires it.
+        */
+       pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+               PAGE_KERNEL_RO : PAGE_KERNEL;
++      pgprot_t tss_prot = PAGE_KERNEL;
+ #endif
+ 
+       __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
++                              per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
++                              PAGE_KERNEL);
+ 
+       /*
+        * The Intel SDM says (Volume 3, 7.2.1):
+@@ -539,9 +548,9 @@ static void __init setup_cpu_entry_area(int cpu)
+                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+       BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+       set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
+-                              &per_cpu(cpu_tss, cpu),
++                              &per_cpu(cpu_tss_rw, cpu),
+                               sizeof(struct tss_struct) / PAGE_SIZE,
+-                              PAGE_KERNEL);
++                              tss_prot);
+ 
+ #ifdef CONFIG_X86_32
+       per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+@@ -1297,7 +1306,7 @@ void enable_sep_cpu(void)
+               return;
+ 
+       cpu = get_cpu();
+-      tss = &per_cpu(cpu_tss, cpu);
++      tss = &per_cpu(cpu_tss_rw, cpu);
+ 
+       /*
+        * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
+@@ -1576,7 +1585,7 @@ void cpu_init(void)
+       if (cpu)
+               load_ucode_ap();
+ 
+-      t = &per_cpu(cpu_tss, cpu);
++      t = &per_cpu(cpu_tss_rw, cpu);
+       oist = &per_cpu(orig_ist, cpu);
+ 
+ #ifdef CONFIG_NUMA
+@@ -1667,7 +1676,7 @@ void cpu_init(void)
+ {
+       int cpu = smp_processor_id();
+       struct task_struct *curr = current;
+-      struct tss_struct *t = &per_cpu(cpu_tss, cpu);
++      struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
+ 
+       wait_for_master_cpu(cpu);
+ 
+diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
+index 4a613fed94b6..d13777d49d8b 100644
+--- a/arch/x86/kernel/ioport.c
++++ b/arch/x86/kernel/ioport.c
+@@ -66,7 +66,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+        * because the ->io_bitmap_max value must match the bitmap
+        * contents:
+        */
+-      tss = &per_cpu(cpu_tss, get_cpu());
++      tss = &per_cpu(cpu_tss_rw, get_cpu());
+ 
+       if (turn_on)
+               bitmap_clear(t->io_bitmap_ptr, from, num);
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index ec758390d24e..3688a7b9d055 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -46,7 +46,7 @@
+  * section. Since TSS's are completely CPU-local, we want them
+  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+  */
+-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
++__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
+       .x86_tss = {
+               /*
+                * .sp0 is only used when entering ring 0 from a lower
+@@ -81,7 +81,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
+ #endif
+ };
+-EXPORT_PER_CPU_SYMBOL(cpu_tss);
++EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
+ 
+ DEFINE_PER_CPU(bool, __tss_limit_invalid);
+ EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
+@@ -110,7 +110,7 @@ void exit_thread(struct task_struct *tsk)
+       struct fpu *fpu = &t->fpu;
+ 
+       if (bp) {
+-              struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
++              struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
+ 
+               t->io_bitmap_ptr = NULL;
+               clear_thread_flag(TIF_IO_BITMAP);
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index c0d60420466c..784ff9147172 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+       struct fpu *prev_fpu = &prev->fpu;
+       struct fpu *next_fpu = &next->fpu;
+       int cpu = smp_processor_id();
+-      struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
++      struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
+ 
+       /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
+ 
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index 157f81816915..c75466232016 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -399,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+       struct fpu *prev_fpu = &prev->fpu;
+       struct fpu *next_fpu = &next->fpu;
+       int cpu = smp_processor_id();
+-      struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
++      struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
+ 
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
+                    this_cpu_read(irq_count) != -1);
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index 2818c83892b3..14b462eefa17 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -376,7 +376,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+               regs->cs == __KERNEL_CS &&
+               regs->ip == (unsigned long)native_irq_return_iret)
+       {
+-              struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
++              struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
+ 
+               /*
+                * regs->sp points to the failing IRET frame on the
+@@ -661,7 +661,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
+        * exception came from the IRET target.
+        */
+       struct bad_iret_stack *new_stack =
+-              (struct bad_iret_stack *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
++              (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
+ 
+       /* Copy the IRET target to the new stack. */
+       memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
+diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
+index 29df077cb089..cf2ac227c2ac 100644
+--- a/arch/x86/lib/delay.c
++++ b/arch/x86/lib/delay.c
+@@ -106,10 +106,10 @@ static void delay_mwaitx(unsigned long __loops)
+               delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
+ 
+               /*
+-               * Use cpu_tss as a cacheline-aligned, seldomly
++               * Use cpu_tss_rw as a cacheline-aligned, seldomly
+                * accessed per-cpu variable as the monitor target.
+                */
+-              __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
++              __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
+ 
+               /*
+                * AMD, like Intel, supports the EAX hint and EAX=0xf
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index 63c81154083b..3b76cf85e306 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -817,7 +817,7 @@ static void xen_load_sp0(unsigned long sp0)
+       mcs = xen_mc_entry(0);
+       MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
+       xen_mc_issue(PARAVIRT_LAZY_CPU);
+-      this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
++      this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
+ }
+ 
+ void xen_set_iopl_mask(unsigned mask)
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
+index 04abcd3f8e2d..3ef7800007f8 100644
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -949,7 +949,7 @@ ENTRY(debug)
+ 
+       /* Are we currently on the SYSENTER stack? */
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+-      addl    $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
++      addl    $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       jb      .Ldebug_from_sysenter_stack
+@@ -993,7 +993,7 @@ ENTRY(nmi)
+ 
+       /* Are we currently on the SYSENTER stack? */
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+-      addl    $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
++      addl    $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       jb      .Lnmi_from_sysenter_stack
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 7a5e9edcdaf4..157860b3569f 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -153,7 +153,7 @@ END(native_usergs_sysret64)
+       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+ 
+ /* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+-#define RSP_SCRATCH   CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + \
++#define RSP_SCRATCH   CPU_ENTRY_AREA_SYSENTER_stack + \
+                       SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
+ 
+ ENTRY(entry_SYSCALL_64_trampoline)
+@@ -389,7 +389,7 @@ syscall_return_via_sysret:
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+-      movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
++      movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+ 
+       pushq   RSP-RDI(%rdi)   /* RSP */
+       pushq   (%rdi)          /* RDI */
+@@ -718,7 +718,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+-      movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
++      movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+ 
+       /* Copy the IRET frame to the trampoline stack. */
+       pushq   6*8(%rdi)       /* SS */
+@@ -946,7 +946,7 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
+ /*
+  * Exception entry points.
+  */
+-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
++#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+ 
+ /*
+  * Switch to the thread stack.  This is called with the IRET frame and
+-- 
+2.14.2
+
diff --git a/patches/kernel/0162-x86-paravirt-Dont-patch-flush_tlb_single.patch b/patches/kernel/0162-x86-paravirt-Dont-patch-flush_tlb_single.patch

deleted file mode 100644 (file)

index 638e7f0..0000000
--- a/patches/kernel/0162-x86-paravirt-Dont-patch-flush_tlb_single.patch
+++ /dev/null
@@ -1,77 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 4 Dec 2017 15:07:30 +0100
-Subject: [PATCH] x86/paravirt: Dont patch flush_tlb_single
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-native_flush_tlb_single() will be changed with the upcoming
-PAGE_TABLE_ISOLATION feature. This requires to have more code in
-there than INVLPG.
-
-Remove the paravirt patching for it.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-Acked-by: Peter Zijlstra <peterz@infradead.org>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Cc: michael.schwarz@iaik.tugraz.at
-Cc: moritz.lipp@iaik.tugraz.at
-Cc: richard.fellner@student.tugraz.at
-Link: https://lkml.kernel.org/r/20171204150606.828111617@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit a035795499ca1c2bd1928808d1a156eda1420383)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 435d79a109b8c04d76a6cdb32b9b49a262f75e61)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/paravirt_patch_64.c | 2 --
- 1 file changed, 2 deletions(-)
-
-diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
-index 11aaf1eaa0e4..c354833342bd 100644
---- a/arch/x86/kernel/paravirt_patch_64.c
-+++ b/arch/x86/kernel/paravirt_patch_64.c
-@@ -9,7 +9,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
- DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
- DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
- DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
--DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
- DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
- 
- DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
-@@ -59,7 +58,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
-               PATCH_SITE(pv_mmu_ops, read_cr2);
-               PATCH_SITE(pv_mmu_ops, read_cr3);
-               PATCH_SITE(pv_mmu_ops, write_cr3);
--              PATCH_SITE(pv_mmu_ops, flush_tlb_single);
-               PATCH_SITE(pv_cpu_ops, wbinvd);
- #if defined(CONFIG_PARAVIRT_SPINLOCKS)
-               case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
--- 
-2.14.2
-
diff --git a/patches/kernel/0163-x86-paravirt-Dont-patch-flush_tlb_single.patch b/patches/kernel/0163-x86-paravirt-Dont-patch-flush_tlb_single.patch

new file mode 100644 (file)

index 0000000..638e7f0
--- /dev/null
+++ b/patches/kernel/0163-x86-paravirt-Dont-patch-flush_tlb_single.patch
@@ -0,0 +1,77 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:30 +0100
+Subject: [PATCH] x86/paravirt: Dont patch flush_tlb_single
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+native_flush_tlb_single() will be changed with the upcoming
+PAGE_TABLE_ISOLATION feature. This requires to have more code in
+there than INVLPG.
+
+Remove the paravirt patching for it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Cc: michael.schwarz@iaik.tugraz.at
+Cc: moritz.lipp@iaik.tugraz.at
+Cc: richard.fellner@student.tugraz.at
+Link: https://lkml.kernel.org/r/20171204150606.828111617@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit a035795499ca1c2bd1928808d1a156eda1420383)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 435d79a109b8c04d76a6cdb32b9b49a262f75e61)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/paravirt_patch_64.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
+index 11aaf1eaa0e4..c354833342bd 100644
+--- a/arch/x86/kernel/paravirt_patch_64.c
++++ b/arch/x86/kernel/paravirt_patch_64.c
+@@ -9,7 +9,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
+ DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
+ DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
+ DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
+-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
+ DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
+ 
+ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
+@@ -59,7 +58,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+               PATCH_SITE(pv_mmu_ops, read_cr2);
+               PATCH_SITE(pv_mmu_ops, read_cr3);
+               PATCH_SITE(pv_mmu_ops, write_cr3);
+-              PATCH_SITE(pv_mmu_ops, flush_tlb_single);
+               PATCH_SITE(pv_cpu_ops, wbinvd);
+ #if defined(CONFIG_PARAVIRT_SPINLOCKS)
+               case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+-- 
+2.14.2
+
diff --git a/patches/kernel/0163-x86-paravirt-Provide-a-way-to-check-for-hypervisors.patch b/patches/kernel/0163-x86-paravirt-Provide-a-way-to-check-for-hypervisors.patch

deleted file mode 100644 (file)

index d268b6e..0000000
--- a/patches/kernel/0163-x86-paravirt-Provide-a-way-to-check-for-hypervisors.patch
+++ /dev/null
@@ -1,105 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 4 Dec 2017 15:07:31 +0100
-Subject: [PATCH] x86/paravirt: Provide a way to check for hypervisors
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-There is no generic way to test whether a kernel is running on a specific
-hypervisor. But that's required to prevent the upcoming user address space
-separation feature in certain guest modes.
-
-Make the hypervisor type enum unconditionally available and provide a
-helper function which allows to test for a specific type.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150606.912938129@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 79cc74155218316b9a5d28577c7077b2adba8e58)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9f637574068f1ffdaded1cd1f408917582594b36)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/hypervisor.h | 25 +++++++++++++++----------
- 1 file changed, 15 insertions(+), 10 deletions(-)
-
-diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
-index 1b0a5abcd8ae..96aa6b9884dc 100644
---- a/arch/x86/include/asm/hypervisor.h
-+++ b/arch/x86/include/asm/hypervisor.h
-@@ -20,16 +20,7 @@
- #ifndef _ASM_X86_HYPERVISOR_H
- #define _ASM_X86_HYPERVISOR_H
- 
--#ifdef CONFIG_HYPERVISOR_GUEST
--
--#include <asm/kvm_para.h>
--#include <asm/x86_init.h>
--#include <asm/xen/hypervisor.h>
--
--/*
-- * x86 hypervisor information
-- */
--
-+/* x86 hypervisor types  */
- enum x86_hypervisor_type {
-       X86_HYPER_NATIVE = 0,
-       X86_HYPER_VMWARE,
-@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
-       X86_HYPER_KVM,
- };
- 
-+#ifdef CONFIG_HYPERVISOR_GUEST
-+
-+#include <asm/kvm_para.h>
-+#include <asm/x86_init.h>
-+#include <asm/xen/hypervisor.h>
-+
- struct hypervisor_x86 {
-       /* Hypervisor name */
-       const char      *name;
-@@ -58,7 +55,15 @@ struct hypervisor_x86 {
- 
- extern enum x86_hypervisor_type x86_hyper_type;
- extern void init_hypervisor_platform(void);
-+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
-+{
-+      return x86_hyper_type == type;
-+}
- #else
- static inline void init_hypervisor_platform(void) { }
-+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
-+{
-+      return type == X86_HYPER_NATIVE;
-+}
- #endif /* CONFIG_HYPERVISOR_GUEST */
- #endif /* _ASM_X86_HYPERVISOR_H */
--- 
-2.14.2
-
diff --git a/patches/kernel/0164-x86-cpufeatures-Make-CPU-bugs-sticky.patch b/patches/kernel/0164-x86-cpufeatures-Make-CPU-bugs-sticky.patch

deleted file mode 100644 (file)

index eaa7c6d..0000000
--- a/patches/kernel/0164-x86-cpufeatures-Make-CPU-bugs-sticky.patch
+++ /dev/null
@@ -1,108 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 4 Dec 2017 15:07:32 +0100
-Subject: [PATCH] x86/cpufeatures: Make CPU bugs sticky
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-There is currently no way to force CPU bug bits like CPU feature bits. That
-makes it impossible to set a bug bit once at boot and have it stick for all
-upcoming CPUs.
-
-Extend the force set/clear arrays to handle bug bits as well.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171204150606.992156574@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 6cbd2171e89b13377261d15e64384df60ecb530e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit aab40a666a40cd015ca4a53231bed544fc679dcb)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cpufeature.h | 2 ++
- arch/x86/include/asm/processor.h  | 4 ++--
- arch/x86/kernel/cpu/common.c      | 6 +++---
- 3 files changed, 7 insertions(+), 5 deletions(-)
-
-diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
-index 225fd8374fae..8b9915561ed1 100644
---- a/arch/x86/include/asm/cpufeature.h
-+++ b/arch/x86/include/asm/cpufeature.h
-@@ -134,6 +134,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
-       set_bit(bit, (unsigned long *)cpu_caps_set);    \
- } while (0)
- 
-+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
-+
- #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
- /*
-  * Static testing of CPU features.  Used the same as boot_cpu_has().
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index bccec7ed1676..59a317f8e0ec 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -162,8 +162,8 @@ extern struct cpuinfo_x86  new_cpu_data;
- #include <linux/thread_info.h>
- 
- extern struct x86_hw_tss      doublefault_tss;
--extern __u32                  cpu_caps_cleared[NCAPINTS];
--extern __u32                  cpu_caps_set[NCAPINTS];
-+extern __u32                  cpu_caps_cleared[NCAPINTS + NBUGINTS];
-+extern __u32                  cpu_caps_set[NCAPINTS + NBUGINTS];
- 
- #ifdef CONFIG_SMP
- DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 7992e5a8076c..fcdba90e0890 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -452,8 +452,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
-       return NULL;            /* Not found */
- }
- 
--__u32 cpu_caps_cleared[NCAPINTS];
--__u32 cpu_caps_set[NCAPINTS];
-+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
-+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
- 
- void load_percpu_segment(int cpu)
- {
-@@ -812,7 +812,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
- {
-       int i;
- 
--      for (i = 0; i < NCAPINTS; i++) {
-+      for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
-               c->x86_capability[i] &= ~cpu_caps_cleared[i];
-               c->x86_capability[i] |= cpu_caps_set[i];
-       }
--- 
-2.14.2
-
diff --git a/patches/kernel/0164-x86-paravirt-Provide-a-way-to-check-for-hypervisors.patch b/patches/kernel/0164-x86-paravirt-Provide-a-way-to-check-for-hypervisors.patch

new file mode 100644 (file)

index 0000000..d268b6e
--- /dev/null
+++ b/patches/kernel/0164-x86-paravirt-Provide-a-way-to-check-for-hypervisors.patch
@@ -0,0 +1,105 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:31 +0100
+Subject: [PATCH] x86/paravirt: Provide a way to check for hypervisors
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+There is no generic way to test whether a kernel is running on a specific
+hypervisor. But that's required to prevent the upcoming user address space
+separation feature in certain guest modes.
+
+Make the hypervisor type enum unconditionally available and provide a
+helper function which allows to test for a specific type.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.912938129@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 79cc74155218316b9a5d28577c7077b2adba8e58)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9f637574068f1ffdaded1cd1f408917582594b36)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/hypervisor.h | 25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
+index 1b0a5abcd8ae..96aa6b9884dc 100644
+--- a/arch/x86/include/asm/hypervisor.h
++++ b/arch/x86/include/asm/hypervisor.h
+@@ -20,16 +20,7 @@
+ #ifndef _ASM_X86_HYPERVISOR_H
+ #define _ASM_X86_HYPERVISOR_H
+ 
+-#ifdef CONFIG_HYPERVISOR_GUEST
+-
+-#include <asm/kvm_para.h>
+-#include <asm/x86_init.h>
+-#include <asm/xen/hypervisor.h>
+-
+-/*
+- * x86 hypervisor information
+- */
+-
++/* x86 hypervisor types  */
+ enum x86_hypervisor_type {
+       X86_HYPER_NATIVE = 0,
+       X86_HYPER_VMWARE,
+@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
+       X86_HYPER_KVM,
+ };
+ 
++#ifdef CONFIG_HYPERVISOR_GUEST
++
++#include <asm/kvm_para.h>
++#include <asm/x86_init.h>
++#include <asm/xen/hypervisor.h>
++
+ struct hypervisor_x86 {
+       /* Hypervisor name */
+       const char      *name;
+@@ -58,7 +55,15 @@ struct hypervisor_x86 {
+ 
+ extern enum x86_hypervisor_type x86_hyper_type;
+ extern void init_hypervisor_platform(void);
++static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
++{
++      return x86_hyper_type == type;
++}
+ #else
+ static inline void init_hypervisor_platform(void) { }
++static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
++{
++      return type == X86_HYPER_NATIVE;
++}
+ #endif /* CONFIG_HYPERVISOR_GUEST */
+ #endif /* _ASM_X86_HYPERVISOR_H */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0165-x86-Kconfig-Limit-NR_CPUS-on-32-bit-to-a-sane-amount.patch b/patches/kernel/0165-x86-Kconfig-Limit-NR_CPUS-on-32-bit-to-a-sane-amount.patch

deleted file mode 100644 (file)

index 69f21bc..0000000
--- a/patches/kernel/0165-x86-Kconfig-Limit-NR_CPUS-on-32-bit-to-a-sane-amount.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 20 Dec 2017 18:02:34 +0100
-Subject: [PATCH] x86/Kconfig: Limit NR_CPUS on 32-bit to a sane amount
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The recent cpu_entry_area changes fail to compile on 32-bit when BIGSMP=y
-and NR_CPUS=512, because the fixmap area becomes too big.
-
-Limit the number of CPUs with BIGSMP to 64, which is already way to big for
-32-bit, but it's at least a working limitation.
-
-We performed a quick survey of 32-bit-only machines that might be affected
-by this change negatively, but found none.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: linux-kernel@vger.kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 7bbcbd3d1cdcbacd0f9f8dc4c98d550972f1ca30)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8ea88ee6f0d058835bfb5685be1ec1beb51177c2)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/Kconfig | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 8b5499bb24bb..51003e53e738 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -923,7 +923,8 @@ config MAXSMP
- config NR_CPUS
-       int "Maximum number of CPUs" if SMP && !MAXSMP
-       range 2 8 if SMP && X86_32 && !X86_BIGSMP
--      range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
-+      range 2 64 if SMP && X86_32 && X86_BIGSMP
-+      range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
-       range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
-       default "1" if !SMP
-       default "8192" if MAXSMP
--- 
-2.14.2
-
diff --git a/patches/kernel/0165-x86-cpufeatures-Make-CPU-bugs-sticky.patch b/patches/kernel/0165-x86-cpufeatures-Make-CPU-bugs-sticky.patch

new file mode 100644 (file)

index 0000000..eaa7c6d
--- /dev/null
+++ b/patches/kernel/0165-x86-cpufeatures-Make-CPU-bugs-sticky.patch
@@ -0,0 +1,108 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:32 +0100
+Subject: [PATCH] x86/cpufeatures: Make CPU bugs sticky
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+There is currently no way to force CPU bug bits like CPU feature bits. That
+makes it impossible to set a bug bit once at boot and have it stick for all
+upcoming CPUs.
+
+Extend the force set/clear arrays to handle bug bits as well.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.992156574@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 6cbd2171e89b13377261d15e64384df60ecb530e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit aab40a666a40cd015ca4a53231bed544fc679dcb)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cpufeature.h | 2 ++
+ arch/x86/include/asm/processor.h  | 4 ++--
+ arch/x86/kernel/cpu/common.c      | 6 +++---
+ 3 files changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
+index 225fd8374fae..8b9915561ed1 100644
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -134,6 +134,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
+       set_bit(bit, (unsigned long *)cpu_caps_set);    \
+ } while (0)
+ 
++#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
++
+ #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
+ /*
+  * Static testing of CPU features.  Used the same as boot_cpu_has().
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index bccec7ed1676..59a317f8e0ec 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -162,8 +162,8 @@ extern struct cpuinfo_x86  new_cpu_data;
+ #include <linux/thread_info.h>
+ 
+ extern struct x86_hw_tss      doublefault_tss;
+-extern __u32                  cpu_caps_cleared[NCAPINTS];
+-extern __u32                  cpu_caps_set[NCAPINTS];
++extern __u32                  cpu_caps_cleared[NCAPINTS + NBUGINTS];
++extern __u32                  cpu_caps_set[NCAPINTS + NBUGINTS];
+ 
+ #ifdef CONFIG_SMP
+ DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 7992e5a8076c..fcdba90e0890 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -452,8 +452,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
+       return NULL;            /* Not found */
+ }
+ 
+-__u32 cpu_caps_cleared[NCAPINTS];
+-__u32 cpu_caps_set[NCAPINTS];
++__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
++__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
+ 
+ void load_percpu_segment(int cpu)
+ {
+@@ -812,7 +812,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
+ {
+       int i;
+ 
+-      for (i = 0; i < NCAPINTS; i++) {
++      for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
+               c->x86_capability[i] &= ~cpu_caps_cleared[i];
+               c->x86_capability[i] |= cpu_caps_set[i];
+       }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0166-x86-Kconfig-Limit-NR_CPUS-on-32-bit-to-a-sane-amount.patch b/patches/kernel/0166-x86-Kconfig-Limit-NR_CPUS-on-32-bit-to-a-sane-amount.patch

new file mode 100644 (file)

index 0000000..69f21bc
--- /dev/null
+++ b/patches/kernel/0166-x86-Kconfig-Limit-NR_CPUS-on-32-bit-to-a-sane-amount.patch
@@ -0,0 +1,56 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Dec 2017 18:02:34 +0100
+Subject: [PATCH] x86/Kconfig: Limit NR_CPUS on 32-bit to a sane amount
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The recent cpu_entry_area changes fail to compile on 32-bit when BIGSMP=y
+and NR_CPUS=512, because the fixmap area becomes too big.
+
+Limit the number of CPUs with BIGSMP to 64, which is already way to big for
+32-bit, but it's at least a working limitation.
+
+We performed a quick survey of 32-bit-only machines that might be affected
+by this change negatively, but found none.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 7bbcbd3d1cdcbacd0f9f8dc4c98d550972f1ca30)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8ea88ee6f0d058835bfb5685be1ec1beb51177c2)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/Kconfig | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 8b5499bb24bb..51003e53e738 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -923,7 +923,8 @@ config MAXSMP
+ config NR_CPUS
+       int "Maximum number of CPUs" if SMP && !MAXSMP
+       range 2 8 if SMP && X86_32 && !X86_BIGSMP
+-      range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
++      range 2 64 if SMP && X86_32 && X86_BIGSMP
++      range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
+       range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
+       default "1" if !SMP
+       default "8192" if MAXSMP
+-- 
+2.14.2
+
diff --git a/patches/kernel/0166-x86-mm-dump_pagetables-Check-PAGE_PRESENT-for-real.patch b/patches/kernel/0166-x86-mm-dump_pagetables-Check-PAGE_PRESENT-for-real.patch

deleted file mode 100644 (file)

index b8205ad..0000000
--- a/patches/kernel/0166-x86-mm-dump_pagetables-Check-PAGE_PRESENT-for-real.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Sat, 16 Dec 2017 01:14:39 +0100
-Subject: [PATCH] x86/mm/dump_pagetables: Check PAGE_PRESENT for real
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The check for a present page in printk_prot():
-
-       if (!pgprot_val(prot)) {
-                /* Not present */
-
-is bogus. If a PTE is set to PAGE_NONE then the pgprot_val is not zero and
-the entry is decoded in bogus ways, e.g. as RX GLB. That is confusing when
-analyzing mapping correctness. Check for the present bit to make an
-informed decision.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: linux-kernel@vger.kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit c05344947b37f7cda726e802457370bc6eac4d26)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d902780eaea12f23b50be4ff00f8df6157c30e4a)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/dump_pagetables.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
-index 0470826d2bdc..91aa41c5e0dd 100644
---- a/arch/x86/mm/dump_pagetables.c
-+++ b/arch/x86/mm/dump_pagetables.c
-@@ -140,7 +140,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
-       static const char * const level_name[] =
-               { "cr3", "pgd", "pud", "pmd", "pte" };
- 
--      if (!pgprot_val(prot)) {
-+      if (!(pr & _PAGE_PRESENT)) {
-               /* Not present */
-               pt_dump_cont_printf(m, dmsg, "                              ");
-       } else {
--- 
-2.14.2
-
diff --git a/patches/kernel/0167-x86-mm-dump_pagetables-Check-PAGE_PRESENT-for-real.patch b/patches/kernel/0167-x86-mm-dump_pagetables-Check-PAGE_PRESENT-for-real.patch

new file mode 100644 (file)

index 0000000..b8205ad
--- /dev/null
+++ b/patches/kernel/0167-x86-mm-dump_pagetables-Check-PAGE_PRESENT-for-real.patch
@@ -0,0 +1,56 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 16 Dec 2017 01:14:39 +0100
+Subject: [PATCH] x86/mm/dump_pagetables: Check PAGE_PRESENT for real
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The check for a present page in printk_prot():
+
+       if (!pgprot_val(prot)) {
+                /* Not present */
+
+is bogus. If a PTE is set to PAGE_NONE then the pgprot_val is not zero and
+the entry is decoded in bogus ways, e.g. as RX GLB. That is confusing when
+analyzing mapping correctness. Check for the present bit to make an
+informed decision.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit c05344947b37f7cda726e802457370bc6eac4d26)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d902780eaea12f23b50be4ff00f8df6157c30e4a)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/dump_pagetables.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
+index 0470826d2bdc..91aa41c5e0dd 100644
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -140,7 +140,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
+       static const char * const level_name[] =
+               { "cr3", "pgd", "pud", "pmd", "pte" };
+ 
+-      if (!pgprot_val(prot)) {
++      if (!(pr & _PAGE_PRESENT)) {
+               /* Not present */
+               pt_dump_cont_printf(m, dmsg, "                              ");
+       } else {
+-- 
+2.14.2
+
diff --git a/patches/kernel/0167-x86-mm-dump_pagetables-Make-the-address-hints-correc.patch b/patches/kernel/0167-x86-mm-dump_pagetables-Make-the-address-hints-correc.patch

deleted file mode 100644 (file)

index 6b6716b..0000000
--- a/patches/kernel/0167-x86-mm-dump_pagetables-Make-the-address-hints-correc.patch
+++ /dev/null
@@ -1,169 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 20 Dec 2017 18:07:42 +0100
-Subject: [PATCH] x86/mm/dump_pagetables: Make the address hints correct and
- readable
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The address hints are a trainwreck. The array entry numbers have to kept
-magically in sync with the actual hints, which is doomed as some of the
-array members are initialized at runtime via the entry numbers.
-
-Designated initializers have been around before this code was
-implemented....
-
-Use the entry numbers to populate the address hints array and add the
-missing bits and pieces. Split 32 and 64 bit for readability sake.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: linux-kernel@vger.kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 146122e24bdf208015d629babba673e28d090709)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 7f4d9163531183fbaa0df1d1b1ceecbade4e58dc)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/dump_pagetables.c | 90 +++++++++++++++++++++++++------------------
- 1 file changed, 53 insertions(+), 37 deletions(-)
-
-diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
-index 91aa41c5e0dd..318a7c30e87e 100644
---- a/arch/x86/mm/dump_pagetables.c
-+++ b/arch/x86/mm/dump_pagetables.c
-@@ -44,10 +44,12 @@ struct addr_marker {
-       unsigned long max_lines;
- };
- 
--/* indices for address_markers; keep sync'd w/ address_markers below */
-+/* Address space markers hints */
-+
-+#ifdef CONFIG_X86_64
-+
- enum address_markers_idx {
-       USER_SPACE_NR = 0,
--#ifdef CONFIG_X86_64
-       KERNEL_SPACE_NR,
-       LOW_KERNEL_NR,
-       VMALLOC_START_NR,
-@@ -56,56 +58,70 @@ enum address_markers_idx {
-       KASAN_SHADOW_START_NR,
-       KASAN_SHADOW_END_NR,
- #endif
--# ifdef CONFIG_X86_ESPFIX64
-+#ifdef CONFIG_X86_ESPFIX64
-       ESPFIX_START_NR,
--# endif
-+#endif
-+#ifdef CONFIG_EFI
-+      EFI_END_NR,
-+#endif
-       HIGH_KERNEL_NR,
-       MODULES_VADDR_NR,
-       MODULES_END_NR,
--#else
-+      FIXADDR_START_NR,
-+      END_OF_SPACE_NR,
-+};
-+
-+static struct addr_marker address_markers[] = {
-+      [USER_SPACE_NR]         = { 0,                  "User Space" },
-+      [KERNEL_SPACE_NR]       = { (1UL << 63),        "Kernel Space" },
-+      [LOW_KERNEL_NR]         = { 0UL,                "Low Kernel Mapping" },
-+      [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
-+      [VMEMMAP_START_NR]      = { 0UL,                "Vmemmap" },
-+#ifdef CONFIG_KASAN
-+      [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
-+      [KASAN_SHADOW_END_NR]   = { KASAN_SHADOW_END,   "KASAN shadow end" },
-+#endif
-+#ifdef CONFIG_X86_ESPFIX64
-+      [ESPFIX_START_NR]       = { ESPFIX_BASE_ADDR,   "ESPfix Area", 16 },
-+#endif
-+#ifdef CONFIG_EFI
-+      [EFI_END_NR]            = { EFI_VA_END,         "EFI Runtime Services" },
-+#endif
-+      [HIGH_KERNEL_NR]        = { __START_KERNEL_map, "High Kernel Mapping" },
-+      [MODULES_VADDR_NR]      = { MODULES_VADDR,      "Modules" },
-+      [MODULES_END_NR]        = { MODULES_END,        "End Modules" },
-+      [FIXADDR_START_NR]      = { FIXADDR_START,      "Fixmap Area" },
-+      [END_OF_SPACE_NR]       = { -1,                 NULL }
-+};
-+
-+#else /* CONFIG_X86_64 */
-+
-+enum address_markers_idx {
-+      USER_SPACE_NR = 0,
-       KERNEL_SPACE_NR,
-       VMALLOC_START_NR,
-       VMALLOC_END_NR,
--# ifdef CONFIG_HIGHMEM
-+#ifdef CONFIG_HIGHMEM
-       PKMAP_BASE_NR,
--# endif
--      FIXADDR_START_NR,
- #endif
-+      FIXADDR_START_NR,
-+      END_OF_SPACE_NR,
- };
- 
--/* Address space markers hints */
- static struct addr_marker address_markers[] = {
--      { 0, "User Space" },
--#ifdef CONFIG_X86_64
--      { 0x8000000000000000UL, "Kernel Space" },
--      { 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
--      { 0/* VMALLOC_START */, "vmalloc() Area" },
--      { 0/* VMEMMAP_START */, "Vmemmap" },
--#ifdef CONFIG_KASAN
--      { KASAN_SHADOW_START,   "KASAN shadow" },
--      { KASAN_SHADOW_END,     "KASAN shadow end" },
-+      [USER_SPACE_NR]         = { 0,                  "User Space" },
-+      [KERNEL_SPACE_NR]       = { PAGE_OFFSET,        "Kernel Mapping" },
-+      [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
-+      [VMALLOC_END_NR]        = { 0UL,                "vmalloc() End" },
-+#ifdef CONFIG_HIGHMEM
-+      [PKMAP_BASE_NR]         = { 0UL,                "Persistent kmap() Area" },
- #endif
--# ifdef CONFIG_X86_ESPFIX64
--      { ESPFIX_BASE_ADDR,     "ESPfix Area", 16 },
--# endif
--# ifdef CONFIG_EFI
--      { EFI_VA_END,           "EFI Runtime Services" },
--# endif
--      { __START_KERNEL_map,   "High Kernel Mapping" },
--      { MODULES_VADDR,        "Modules" },
--      { MODULES_END,          "End Modules" },
--#else
--      { PAGE_OFFSET,          "Kernel Mapping" },
--      { 0/* VMALLOC_START */, "vmalloc() Area" },
--      { 0/*VMALLOC_END*/,     "vmalloc() End" },
--# ifdef CONFIG_HIGHMEM
--      { 0/*PKMAP_BASE*/,      "Persistent kmap() Area" },
--# endif
--      { 0/*FIXADDR_START*/,   "Fixmap Area" },
--#endif
--      { -1, NULL }            /* End of list */
-+      [FIXADDR_START_NR]      = { 0UL,                "Fixmap area" },
-+      [END_OF_SPACE_NR]       = { -1,                 NULL }
- };
- 
-+#endif /* !CONFIG_X86_64 */
-+
- /* Multipliers for offsets within the PTEs */
- #define PTE_LEVEL_MULT (PAGE_SIZE)
- #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
--- 
-2.14.2
-
diff --git a/patches/kernel/0168-x86-mm-dump_pagetables-Make-the-address-hints-correc.patch b/patches/kernel/0168-x86-mm-dump_pagetables-Make-the-address-hints-correc.patch

new file mode 100644 (file)

index 0000000..6b6716b
--- /dev/null
+++ b/patches/kernel/0168-x86-mm-dump_pagetables-Make-the-address-hints-correc.patch
@@ -0,0 +1,169 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Dec 2017 18:07:42 +0100
+Subject: [PATCH] x86/mm/dump_pagetables: Make the address hints correct and
+ readable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The address hints are a trainwreck. The array entry numbers have to kept
+magically in sync with the actual hints, which is doomed as some of the
+array members are initialized at runtime via the entry numbers.
+
+Designated initializers have been around before this code was
+implemented....
+
+Use the entry numbers to populate the address hints array and add the
+missing bits and pieces. Split 32 and 64 bit for readability sake.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 146122e24bdf208015d629babba673e28d090709)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 7f4d9163531183fbaa0df1d1b1ceecbade4e58dc)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/dump_pagetables.c | 90 +++++++++++++++++++++++++------------------
+ 1 file changed, 53 insertions(+), 37 deletions(-)
+
+diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
+index 91aa41c5e0dd..318a7c30e87e 100644
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -44,10 +44,12 @@ struct addr_marker {
+       unsigned long max_lines;
+ };
+ 
+-/* indices for address_markers; keep sync'd w/ address_markers below */
++/* Address space markers hints */
++
++#ifdef CONFIG_X86_64
++
+ enum address_markers_idx {
+       USER_SPACE_NR = 0,
+-#ifdef CONFIG_X86_64
+       KERNEL_SPACE_NR,
+       LOW_KERNEL_NR,
+       VMALLOC_START_NR,
+@@ -56,56 +58,70 @@ enum address_markers_idx {
+       KASAN_SHADOW_START_NR,
+       KASAN_SHADOW_END_NR,
+ #endif
+-# ifdef CONFIG_X86_ESPFIX64
++#ifdef CONFIG_X86_ESPFIX64
+       ESPFIX_START_NR,
+-# endif
++#endif
++#ifdef CONFIG_EFI
++      EFI_END_NR,
++#endif
+       HIGH_KERNEL_NR,
+       MODULES_VADDR_NR,
+       MODULES_END_NR,
+-#else
++      FIXADDR_START_NR,
++      END_OF_SPACE_NR,
++};
++
++static struct addr_marker address_markers[] = {
++      [USER_SPACE_NR]         = { 0,                  "User Space" },
++      [KERNEL_SPACE_NR]       = { (1UL << 63),        "Kernel Space" },
++      [LOW_KERNEL_NR]         = { 0UL,                "Low Kernel Mapping" },
++      [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
++      [VMEMMAP_START_NR]      = { 0UL,                "Vmemmap" },
++#ifdef CONFIG_KASAN
++      [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
++      [KASAN_SHADOW_END_NR]   = { KASAN_SHADOW_END,   "KASAN shadow end" },
++#endif
++#ifdef CONFIG_X86_ESPFIX64
++      [ESPFIX_START_NR]       = { ESPFIX_BASE_ADDR,   "ESPfix Area", 16 },
++#endif
++#ifdef CONFIG_EFI
++      [EFI_END_NR]            = { EFI_VA_END,         "EFI Runtime Services" },
++#endif
++      [HIGH_KERNEL_NR]        = { __START_KERNEL_map, "High Kernel Mapping" },
++      [MODULES_VADDR_NR]      = { MODULES_VADDR,      "Modules" },
++      [MODULES_END_NR]        = { MODULES_END,        "End Modules" },
++      [FIXADDR_START_NR]      = { FIXADDR_START,      "Fixmap Area" },
++      [END_OF_SPACE_NR]       = { -1,                 NULL }
++};
++
++#else /* CONFIG_X86_64 */
++
++enum address_markers_idx {
++      USER_SPACE_NR = 0,
+       KERNEL_SPACE_NR,
+       VMALLOC_START_NR,
+       VMALLOC_END_NR,
+-# ifdef CONFIG_HIGHMEM
++#ifdef CONFIG_HIGHMEM
+       PKMAP_BASE_NR,
+-# endif
+-      FIXADDR_START_NR,
+ #endif
++      FIXADDR_START_NR,
++      END_OF_SPACE_NR,
+ };
+ 
+-/* Address space markers hints */
+ static struct addr_marker address_markers[] = {
+-      { 0, "User Space" },
+-#ifdef CONFIG_X86_64
+-      { 0x8000000000000000UL, "Kernel Space" },
+-      { 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
+-      { 0/* VMALLOC_START */, "vmalloc() Area" },
+-      { 0/* VMEMMAP_START */, "Vmemmap" },
+-#ifdef CONFIG_KASAN
+-      { KASAN_SHADOW_START,   "KASAN shadow" },
+-      { KASAN_SHADOW_END,     "KASAN shadow end" },
++      [USER_SPACE_NR]         = { 0,                  "User Space" },
++      [KERNEL_SPACE_NR]       = { PAGE_OFFSET,        "Kernel Mapping" },
++      [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
++      [VMALLOC_END_NR]        = { 0UL,                "vmalloc() End" },
++#ifdef CONFIG_HIGHMEM
++      [PKMAP_BASE_NR]         = { 0UL,                "Persistent kmap() Area" },
+ #endif
+-# ifdef CONFIG_X86_ESPFIX64
+-      { ESPFIX_BASE_ADDR,     "ESPfix Area", 16 },
+-# endif
+-# ifdef CONFIG_EFI
+-      { EFI_VA_END,           "EFI Runtime Services" },
+-# endif
+-      { __START_KERNEL_map,   "High Kernel Mapping" },
+-      { MODULES_VADDR,        "Modules" },
+-      { MODULES_END,          "End Modules" },
+-#else
+-      { PAGE_OFFSET,          "Kernel Mapping" },
+-      { 0/* VMALLOC_START */, "vmalloc() Area" },
+-      { 0/*VMALLOC_END*/,     "vmalloc() End" },
+-# ifdef CONFIG_HIGHMEM
+-      { 0/*PKMAP_BASE*/,      "Persistent kmap() Area" },
+-# endif
+-      { 0/*FIXADDR_START*/,   "Fixmap Area" },
+-#endif
+-      { -1, NULL }            /* End of list */
++      [FIXADDR_START_NR]      = { 0UL,                "Fixmap area" },
++      [END_OF_SPACE_NR]       = { -1,                 NULL }
+ };
+ 
++#endif /* !CONFIG_X86_64 */
++
+ /* Multipliers for offsets within the PTEs */
+ #define PTE_LEVEL_MULT (PAGE_SIZE)
+ #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0168-x86-vsyscall-64-Explicitly-set-_PAGE_USER-in-the-pag.patch b/patches/kernel/0168-x86-vsyscall-64-Explicitly-set-_PAGE_USER-in-the-pag.patch

deleted file mode 100644 (file)

index dd32f28..0000000
--- a/patches/kernel/0168-x86-vsyscall-64-Explicitly-set-_PAGE_USER-in-the-pag.patch
+++ /dev/null
@@ -1,108 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 10 Dec 2017 22:47:19 -0800
-Subject: [PATCH] x86/vsyscall/64: Explicitly set _PAGE_USER in the pagetable
- hierarchy
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The kernel is very erratic as to which pagetables have _PAGE_USER set.  The
-vsyscall page gets lucky: it seems that all of the relevant pagetables are
-among the apparently arbitrary ones that set _PAGE_USER.  Rather than
-relying on chance, just explicitly set _PAGE_USER.
-
-This will let us clean up pagetable setup to stop setting _PAGE_USER.  The
-added code can also be reused by pagetable isolation to manage the
-_PAGE_USER bit in the usermode tables.
-
-[ tglx: Folded paravirt fix from Juergen Gross ]
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Kees Cook <keescook@chromium.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 49275fef986abfb8b476e4708aaecc07e7d3e087)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 445742d3632efea229c0b974f91e56a19cf31996)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/vsyscall/vsyscall_64.c | 34 +++++++++++++++++++++++++++++++++-
- 1 file changed, 33 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
-index ce1d7534fa53..91f3133cf5f1 100644
---- a/arch/x86/entry/vsyscall/vsyscall_64.c
-+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
-@@ -36,6 +36,7 @@
- #include <asm/unistd.h>
- #include <asm/fixmap.h>
- #include <asm/traps.h>
-+#include <asm/paravirt.h>
- 
- #define CREATE_TRACE_POINTS
- #include "vsyscall_trace.h"
-@@ -328,16 +329,47 @@ int in_gate_area_no_mm(unsigned long addr)
-       return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
- }
- 
-+/*
-+ * The VSYSCALL page is the only user-accessible page in the kernel address
-+ * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
-+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
-+ * are enabled.
-+ *
-+ * Some day we may create a "minimal" vsyscall mode in which we emulate
-+ * vsyscalls but leave the page not present.  If so, we skip calling
-+ * this.
-+ */
-+static void __init set_vsyscall_pgtable_user_bits(void)
-+{
-+      pgd_t *pgd;
-+      p4d_t *p4d;
-+      pud_t *pud;
-+      pmd_t *pmd;
-+
-+      pgd = pgd_offset_k(VSYSCALL_ADDR);
-+      set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
-+      p4d = p4d_offset(pgd, VSYSCALL_ADDR);
-+#if CONFIG_PGTABLE_LEVELS >= 5
-+      p4d->p4d |= _PAGE_USER;
-+#endif
-+      pud = pud_offset(p4d, VSYSCALL_ADDR);
-+      set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
-+      pmd = pmd_offset(pud, VSYSCALL_ADDR);
-+      set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
-+}
-+
- void __init map_vsyscall(void)
- {
-       extern char __vsyscall_page;
-       unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
- 
--      if (vsyscall_mode != NONE)
-+      if (vsyscall_mode != NONE) {
-               __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
-                            vsyscall_mode == NATIVE
-                            ? PAGE_KERNEL_VSYSCALL
-                            : PAGE_KERNEL_VVAR);
-+              set_vsyscall_pgtable_user_bits();
-+      }
- 
-       BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
-                    (unsigned long)VSYSCALL_ADDR);
--- 
-2.14.2
-
diff --git a/patches/kernel/0169-x86-vsyscall-64-Explicitly-set-_PAGE_USER-in-the-pag.patch b/patches/kernel/0169-x86-vsyscall-64-Explicitly-set-_PAGE_USER-in-the-pag.patch

new file mode 100644 (file)

index 0000000..dd32f28
--- /dev/null
+++ b/patches/kernel/0169-x86-vsyscall-64-Explicitly-set-_PAGE_USER-in-the-pag.patch
@@ -0,0 +1,108 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 10 Dec 2017 22:47:19 -0800
+Subject: [PATCH] x86/vsyscall/64: Explicitly set _PAGE_USER in the pagetable
+ hierarchy
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The kernel is very erratic as to which pagetables have _PAGE_USER set.  The
+vsyscall page gets lucky: it seems that all of the relevant pagetables are
+among the apparently arbitrary ones that set _PAGE_USER.  Rather than
+relying on chance, just explicitly set _PAGE_USER.
+
+This will let us clean up pagetable setup to stop setting _PAGE_USER.  The
+added code can also be reused by pagetable isolation to manage the
+_PAGE_USER bit in the usermode tables.
+
+[ tglx: Folded paravirt fix from Juergen Gross ]
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 49275fef986abfb8b476e4708aaecc07e7d3e087)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 445742d3632efea229c0b974f91e56a19cf31996)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/vsyscall/vsyscall_64.c | 34 +++++++++++++++++++++++++++++++++-
+ 1 file changed, 33 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
+index ce1d7534fa53..91f3133cf5f1 100644
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -36,6 +36,7 @@
+ #include <asm/unistd.h>
+ #include <asm/fixmap.h>
+ #include <asm/traps.h>
++#include <asm/paravirt.h>
+ 
+ #define CREATE_TRACE_POINTS
+ #include "vsyscall_trace.h"
+@@ -328,16 +329,47 @@ int in_gate_area_no_mm(unsigned long addr)
+       return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
+ }
+ 
++/*
++ * The VSYSCALL page is the only user-accessible page in the kernel address
++ * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
++ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
++ * are enabled.
++ *
++ * Some day we may create a "minimal" vsyscall mode in which we emulate
++ * vsyscalls but leave the page not present.  If so, we skip calling
++ * this.
++ */
++static void __init set_vsyscall_pgtable_user_bits(void)
++{
++      pgd_t *pgd;
++      p4d_t *p4d;
++      pud_t *pud;
++      pmd_t *pmd;
++
++      pgd = pgd_offset_k(VSYSCALL_ADDR);
++      set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
++      p4d = p4d_offset(pgd, VSYSCALL_ADDR);
++#if CONFIG_PGTABLE_LEVELS >= 5
++      p4d->p4d |= _PAGE_USER;
++#endif
++      pud = pud_offset(p4d, VSYSCALL_ADDR);
++      set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
++      pmd = pmd_offset(pud, VSYSCALL_ADDR);
++      set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
++}
++
+ void __init map_vsyscall(void)
+ {
+       extern char __vsyscall_page;
+       unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
+ 
+-      if (vsyscall_mode != NONE)
++      if (vsyscall_mode != NONE) {
+               __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
+                            vsyscall_mode == NATIVE
+                            ? PAGE_KERNEL_VSYSCALL
+                            : PAGE_KERNEL_VVAR);
++              set_vsyscall_pgtable_user_bits();
++      }
+ 
+       BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
+                    (unsigned long)VSYSCALL_ADDR);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0169-x86-vsyscall-64-Warn-and-fail-vsyscall-emulation-in-.patch b/patches/kernel/0169-x86-vsyscall-64-Warn-and-fail-vsyscall-emulation-in-.patch

deleted file mode 100644 (file)

index f1c7be9..0000000
--- a/patches/kernel/0169-x86-vsyscall-64-Warn-and-fail-vsyscall-emulation-in-.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Sun, 10 Dec 2017 22:47:20 -0800
-Subject: [PATCH] x86/vsyscall/64: Warn and fail vsyscall emulation in NATIVE
- mode
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-If something goes wrong with pagetable setup, vsyscall=native will
-accidentally fall back to emulation.  Make it warn and fail so that we
-notice.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Kees Cook <keescook@chromium.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 4831b779403a836158917d59a7ca880483c67378)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit ba10c7488b12c3106d79c8b2ba3f4e79c7e40ee4)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/vsyscall/vsyscall_64.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
-index 91f3133cf5f1..5e56a4ced848 100644
---- a/arch/x86/entry/vsyscall/vsyscall_64.c
-+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
-@@ -138,6 +138,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
- 
-       WARN_ON_ONCE(address != regs->ip);
- 
-+      /* This should be unreachable in NATIVE mode. */
-+      if (WARN_ON(vsyscall_mode == NATIVE))
-+              return false;
-+
-       if (vsyscall_mode == NONE) {
-               warn_bad_vsyscall(KERN_INFO, regs,
-                                 "vsyscall attempted with vsyscall=none");
--- 
-2.14.2
-
diff --git a/patches/kernel/0170-arch-mm-Allow-arch_dup_mmap-to-fail.patch b/patches/kernel/0170-arch-mm-Allow-arch_dup_mmap-to-fail.patch

deleted file mode 100644 (file)

index d7db473..0000000
--- a/patches/kernel/0170-arch-mm-Allow-arch_dup_mmap-to-fail.patch
+++ /dev/null
@@ -1,155 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Thu, 14 Dec 2017 12:27:29 +0100
-Subject: [PATCH] arch, mm: Allow arch_dup_mmap() to fail
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-In order to sanitize the LDT initialization on x86 arch_dup_mmap() must be
-allowed to fail. Fix up all instances.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Andy Lutomirsky <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: dan.j.williams@intel.com
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: kirill.shutemov@linux.intel.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit c10e83f598d08046dd1ebc8360d4bb12d802d51b)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit b812abb61437eda1f5718a95085d67902f813f2f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/powerpc/include/asm/mmu_context.h   | 5 +++--
- arch/um/include/asm/mmu_context.h        | 3 ++-
- arch/unicore32/include/asm/mmu_context.h | 5 +++--
- arch/x86/include/asm/mmu_context.h       | 4 ++--
- include/asm-generic/mm_hooks.h           | 5 +++--
- kernel/fork.c                            | 3 +--
- 6 files changed, 14 insertions(+), 11 deletions(-)
-
-diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
-index 35bec1c5bd5a..60afcc94e673 100644
---- a/arch/powerpc/include/asm/mmu_context.h
-+++ b/arch/powerpc/include/asm/mmu_context.h
-@@ -185,9 +185,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
- #endif
- }
- 
--static inline void arch_dup_mmap(struct mm_struct *oldmm,
--                               struct mm_struct *mm)
-+static inline int arch_dup_mmap(struct mm_struct *oldmm,
-+                              struct mm_struct *mm)
- {
-+      return 0;
- }
- 
- static inline void arch_exit_mmap(struct mm_struct *mm)
-diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
-index b668e351fd6c..fca34b2177e2 100644
---- a/arch/um/include/asm/mmu_context.h
-+++ b/arch/um/include/asm/mmu_context.h
-@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
- /*
-  * Needed since we do not use the asm-generic/mm_hooks.h:
-  */
--static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
-+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
- {
-       uml_setup_stubs(mm);
-+      return 0;
- }
- extern void arch_exit_mmap(struct mm_struct *mm);
- static inline void arch_unmap(struct mm_struct *mm,
-diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
-index 59b06b48f27d..5c205a9cb5a6 100644
---- a/arch/unicore32/include/asm/mmu_context.h
-+++ b/arch/unicore32/include/asm/mmu_context.h
-@@ -81,9 +81,10 @@ do { \
-       } \
- } while (0)
- 
--static inline void arch_dup_mmap(struct mm_struct *oldmm,
--                               struct mm_struct *mm)
-+static inline int arch_dup_mmap(struct mm_struct *oldmm,
-+                              struct mm_struct *mm)
- {
-+      return 0;
- }
- 
- static inline void arch_unmap(struct mm_struct *mm,
-diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index efc530642f7d..9be54d9c04c4 100644
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -175,10 +175,10 @@ do {                                             \
- } while (0)
- #endif
- 
--static inline void arch_dup_mmap(struct mm_struct *oldmm,
--                               struct mm_struct *mm)
-+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
- {
-       paravirt_arch_dup_mmap(oldmm, mm);
-+      return 0;
- }
- 
- static inline void arch_exit_mmap(struct mm_struct *mm)
-diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
-index 41e5b6784b97..7a2980f4e3e6 100644
---- a/include/asm-generic/mm_hooks.h
-+++ b/include/asm-generic/mm_hooks.h
-@@ -6,9 +6,10 @@
- #ifndef _ASM_GENERIC_MM_HOOKS_H
- #define _ASM_GENERIC_MM_HOOKS_H
- 
--static inline void arch_dup_mmap(struct mm_struct *oldmm,
--                               struct mm_struct *mm)
-+static inline int arch_dup_mmap(struct mm_struct *oldmm,
-+                              struct mm_struct *mm)
- {
-+      return 0;
- }
- 
- static inline void arch_exit_mmap(struct mm_struct *mm)
-diff --git a/kernel/fork.c b/kernel/fork.c
-index 8efc6b4466e3..1d907772b9d2 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -712,8 +712,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
-                       goto out;
-       }
-       /* a new mm has just been created */
--      arch_dup_mmap(oldmm, mm);
--      retval = 0;
-+      retval = arch_dup_mmap(oldmm, mm);
- out:
-       up_write(&mm->mmap_sem);
-       flush_tlb_mm(oldmm);
--- 
-2.14.2
-
diff --git a/patches/kernel/0170-x86-vsyscall-64-Warn-and-fail-vsyscall-emulation-in-.patch b/patches/kernel/0170-x86-vsyscall-64-Warn-and-fail-vsyscall-emulation-in-.patch

new file mode 100644 (file)

index 0000000..f1c7be9
--- /dev/null
+++ b/patches/kernel/0170-x86-vsyscall-64-Warn-and-fail-vsyscall-emulation-in-.patch
@@ -0,0 +1,55 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 10 Dec 2017 22:47:20 -0800
+Subject: [PATCH] x86/vsyscall/64: Warn and fail vsyscall emulation in NATIVE
+ mode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+If something goes wrong with pagetable setup, vsyscall=native will
+accidentally fall back to emulation.  Make it warn and fail so that we
+notice.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 4831b779403a836158917d59a7ca880483c67378)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit ba10c7488b12c3106d79c8b2ba3f4e79c7e40ee4)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/vsyscall/vsyscall_64.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
+index 91f3133cf5f1..5e56a4ced848 100644
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -138,6 +138,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+ 
+       WARN_ON_ONCE(address != regs->ip);
+ 
++      /* This should be unreachable in NATIVE mode. */
++      if (WARN_ON(vsyscall_mode == NATIVE))
++              return false;
++
+       if (vsyscall_mode == NONE) {
+               warn_bad_vsyscall(KERN_INFO, regs,
+                                 "vsyscall attempted with vsyscall=none");
+-- 
+2.14.2
+
diff --git a/patches/kernel/0171-arch-mm-Allow-arch_dup_mmap-to-fail.patch b/patches/kernel/0171-arch-mm-Allow-arch_dup_mmap-to-fail.patch

new file mode 100644 (file)

index 0000000..d7db473
--- /dev/null
+++ b/patches/kernel/0171-arch-mm-Allow-arch_dup_mmap-to-fail.patch
@@ -0,0 +1,155 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Dec 2017 12:27:29 +0100
+Subject: [PATCH] arch, mm: Allow arch_dup_mmap() to fail
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+In order to sanitize the LDT initialization on x86 arch_dup_mmap() must be
+allowed to fail. Fix up all instances.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirsky <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: dan.j.williams@intel.com
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: kirill.shutemov@linux.intel.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit c10e83f598d08046dd1ebc8360d4bb12d802d51b)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit b812abb61437eda1f5718a95085d67902f813f2f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/powerpc/include/asm/mmu_context.h   | 5 +++--
+ arch/um/include/asm/mmu_context.h        | 3 ++-
+ arch/unicore32/include/asm/mmu_context.h | 5 +++--
+ arch/x86/include/asm/mmu_context.h       | 4 ++--
+ include/asm-generic/mm_hooks.h           | 5 +++--
+ kernel/fork.c                            | 3 +--
+ 6 files changed, 14 insertions(+), 11 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
+index 35bec1c5bd5a..60afcc94e673 100644
+--- a/arch/powerpc/include/asm/mmu_context.h
++++ b/arch/powerpc/include/asm/mmu_context.h
+@@ -185,9 +185,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
+ #endif
+ }
+ 
+-static inline void arch_dup_mmap(struct mm_struct *oldmm,
+-                               struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm,
++                              struct mm_struct *mm)
+ {
++      return 0;
+ }
+ 
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
+index b668e351fd6c..fca34b2177e2 100644
+--- a/arch/um/include/asm/mmu_context.h
++++ b/arch/um/include/asm/mmu_context.h
+@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
+ /*
+  * Needed since we do not use the asm-generic/mm_hooks.h:
+  */
+-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+ {
+       uml_setup_stubs(mm);
++      return 0;
+ }
+ extern void arch_exit_mmap(struct mm_struct *mm);
+ static inline void arch_unmap(struct mm_struct *mm,
+diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
+index 59b06b48f27d..5c205a9cb5a6 100644
+--- a/arch/unicore32/include/asm/mmu_context.h
++++ b/arch/unicore32/include/asm/mmu_context.h
+@@ -81,9 +81,10 @@ do { \
+       } \
+ } while (0)
+ 
+-static inline void arch_dup_mmap(struct mm_struct *oldmm,
+-                               struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm,
++                              struct mm_struct *mm)
+ {
++      return 0;
+ }
+ 
+ static inline void arch_unmap(struct mm_struct *mm,
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index efc530642f7d..9be54d9c04c4 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -175,10 +175,10 @@ do {                                             \
+ } while (0)
+ #endif
+ 
+-static inline void arch_dup_mmap(struct mm_struct *oldmm,
+-                               struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+ {
+       paravirt_arch_dup_mmap(oldmm, mm);
++      return 0;
+ }
+ 
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
+index 41e5b6784b97..7a2980f4e3e6 100644
+--- a/include/asm-generic/mm_hooks.h
++++ b/include/asm-generic/mm_hooks.h
+@@ -6,9 +6,10 @@
+ #ifndef _ASM_GENERIC_MM_HOOKS_H
+ #define _ASM_GENERIC_MM_HOOKS_H
+ 
+-static inline void arch_dup_mmap(struct mm_struct *oldmm,
+-                               struct mm_struct *mm)
++static inline int arch_dup_mmap(struct mm_struct *oldmm,
++                              struct mm_struct *mm)
+ {
++      return 0;
+ }
+ 
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 8efc6b4466e3..1d907772b9d2 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -712,8 +712,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
+                       goto out;
+       }
+       /* a new mm has just been created */
+-      arch_dup_mmap(oldmm, mm);
+-      retval = 0;
++      retval = arch_dup_mmap(oldmm, mm);
+ out:
+       up_write(&mm->mmap_sem);
+       flush_tlb_mm(oldmm);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0171-x86-ldt-Rework-locking.patch b/patches/kernel/0171-x86-ldt-Rework-locking.patch

deleted file mode 100644 (file)

index a8c17a4..0000000
--- a/patches/kernel/0171-x86-ldt-Rework-locking.patch
+++ /dev/null
@@ -1,199 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Thu, 14 Dec 2017 12:27:30 +0100
-Subject: [PATCH] x86/ldt: Rework locking
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The LDT is duplicated on fork() and on exec(), which is wrong as exec()
-should start from a clean state, i.e. without LDT. To fix this the LDT
-duplication code will be moved into arch_dup_mmap() which is only called
-for fork().
-
-This introduces a locking problem. arch_dup_mmap() holds mmap_sem of the
-parent process, but the LDT duplication code needs to acquire
-mm->context.lock to access the LDT data safely, which is the reverse lock
-order of write_ldt() where mmap_sem nests into context.lock.
-
-Solve this by introducing a new rw semaphore which serializes the
-read/write_ldt() syscall operations and use context.lock to protect the
-actual installment of the LDT descriptor.
-
-So context.lock stabilizes mm->context.ldt and can nest inside of the new
-semaphore or mmap_sem.
-
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Andy Lutomirsky <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: dan.j.williams@intel.com
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: kirill.shutemov@linux.intel.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit c2b3496bb30bd159e9de42e5c952e1f1f33c9a77)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit bf7ee649ccc71ef9acb713a00472886c19e78684)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/mmu.h         |  4 +++-
- arch/x86/include/asm/mmu_context.h |  2 ++
- arch/x86/kernel/ldt.c              | 33 +++++++++++++++++++++------------
- 3 files changed, 26 insertions(+), 13 deletions(-)
-
-diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
-index bb8c597c2248..2d7e852b2dad 100644
---- a/arch/x86/include/asm/mmu.h
-+++ b/arch/x86/include/asm/mmu.h
-@@ -2,6 +2,7 @@
- #define _ASM_X86_MMU_H
- 
- #include <linux/spinlock.h>
-+#include <linux/rwsem.h>
- #include <linux/mutex.h>
- #include <linux/atomic.h>
- 
-@@ -26,7 +27,8 @@ typedef struct {
-       atomic64_t tlb_gen;
- 
- #ifdef CONFIG_MODIFY_LDT_SYSCALL
--      struct ldt_struct *ldt;
-+      struct rw_semaphore     ldt_usr_sem;
-+      struct ldt_struct       *ldt;
- #endif
- 
- #ifdef CONFIG_X86_64
-diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index 9be54d9c04c4..dd865c2acb9d 100644
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -131,6 +131,8 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
- static inline int init_new_context(struct task_struct *tsk,
-                                  struct mm_struct *mm)
- {
-+      mutex_init(&mm->context.lock);
-+
-       mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
-       atomic64_set(&mm->context.tlb_gen, 0);
- 
-diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
-index b8be2413cb74..3e7208f0c350 100644
---- a/arch/x86/kernel/ldt.c
-+++ b/arch/x86/kernel/ldt.c
-@@ -4,6 +4,11 @@
-  * Copyright (C) 2002 Andi Kleen
-  *
-  * This handles calls from both 32bit and 64bit mode.
-+ *
-+ * Lock order:
-+ *    contex.ldt_usr_sem
-+ *      mmap_sem
-+ *        context.lock
-  */
- 
- #include <linux/errno.h>
-@@ -41,7 +46,7 @@ static void refresh_ldt_segments(void)
- #endif
- }
- 
--/* context.lock is held for us, so we don't need any locking. */
-+/* context.lock is held by the task which issued the smp function call */
- static void flush_ldt(void *__mm)
- {
-       struct mm_struct *mm = __mm;
-@@ -98,15 +103,17 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
-       paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
- }
- 
--/* context.lock is held */
--static void install_ldt(struct mm_struct *current_mm,
--                      struct ldt_struct *ldt)
-+static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
- {
-+      mutex_lock(&mm->context.lock);
-+
-       /* Synchronizes with READ_ONCE in load_mm_ldt. */
--      smp_store_release(&current_mm->context.ldt, ldt);
-+      smp_store_release(&mm->context.ldt, ldt);
- 
--      /* Activate the LDT for all CPUs using current_mm. */
--      on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
-+      /* Activate the LDT for all CPUs using currents mm. */
-+      on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
-+
-+      mutex_unlock(&mm->context.lock);
- }
- 
- static void free_ldt_struct(struct ldt_struct *ldt)
-@@ -132,7 +139,8 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
-       struct mm_struct *old_mm;
-       int retval = 0;
- 
--      mutex_init(&mm->context.lock);
-+      init_rwsem(&mm->context.ldt_usr_sem);
-+
-       old_mm = current->mm;
-       if (!old_mm) {
-               mm->context.ldt = NULL;
-@@ -179,7 +187,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
-       unsigned long entries_size;
-       int retval;
- 
--      mutex_lock(&mm->context.lock);
-+      down_read(&mm->context.ldt_usr_sem);
- 
-       if (!mm->context.ldt) {
-               retval = 0;
-@@ -208,7 +216,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
-       retval = bytecount;
- 
- out_unlock:
--      mutex_unlock(&mm->context.lock);
-+      up_read(&mm->context.ldt_usr_sem);
-       return retval;
- }
- 
-@@ -268,7 +276,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
-                       ldt.avl = 0;
-       }
- 
--      mutex_lock(&mm->context.lock);
-+      if (down_write_killable(&mm->context.ldt_usr_sem))
-+              return -EINTR;
- 
-       old_ldt       = mm->context.ldt;
-       old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
-@@ -290,7 +299,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
-       error = 0;
- 
- out_unlock:
--      mutex_unlock(&mm->context.lock);
-+      up_write(&mm->context.ldt_usr_sem);
- out:
-       return error;
- }
--- 
-2.14.2
-
diff --git a/patches/kernel/0172-x86-ldt-Prevent-LDT-inheritance-on-exec.patch b/patches/kernel/0172-x86-ldt-Prevent-LDT-inheritance-on-exec.patch

deleted file mode 100644 (file)

index 4348215..0000000
--- a/patches/kernel/0172-x86-ldt-Prevent-LDT-inheritance-on-exec.patch
+++ /dev/null
@@ -1,177 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Thu, 14 Dec 2017 12:27:31 +0100
-Subject: [PATCH] x86/ldt: Prevent LDT inheritance on exec
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The LDT is inherited across fork() or exec(), but that makes no sense
-at all because exec() is supposed to start the process clean.
-
-The reason why this happens is that init_new_context_ldt() is called from
-init_new_context() which obviously needs to be called for both fork() and
-exec().
-
-It would be surprising if anything relies on that behaviour, so it seems to
-be safe to remove that misfeature.
-
-Split the context initialization into two parts. Clear the LDT pointer and
-initialize the mutex from the general context init and move the LDT
-duplication to arch_dup_mmap() which is only called on fork().
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Peter Zijlstra <peterz@infradead.org>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Andy Lutomirsky <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: dan.j.williams@intel.com
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: kirill.shutemov@linux.intel.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit a4828f81037f491b2cc986595e3a969a6eeb2fb5)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f90d254204df4b336731f23bb5417226f51e8651)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/mmu_context.h    | 21 ++++++++++++++-------
- arch/x86/kernel/ldt.c                 | 18 +++++-------------
- tools/testing/selftests/x86/ldt_gdt.c |  9 +++------
- 3 files changed, 22 insertions(+), 26 deletions(-)
-
-diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index dd865c2acb9d..47ec51a821e8 100644
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -56,11 +56,17 @@ struct ldt_struct {
- /*
-  * Used for LDT copy/destruction.
-  */
--int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
-+static inline void init_new_context_ldt(struct mm_struct *mm)
-+{
-+      mm->context.ldt = NULL;
-+      init_rwsem(&mm->context.ldt_usr_sem);
-+}
-+int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
- void destroy_context_ldt(struct mm_struct *mm);
- #else /* CONFIG_MODIFY_LDT_SYSCALL */
--static inline int init_new_context_ldt(struct task_struct *tsk,
--                                     struct mm_struct *mm)
-+static inline void init_new_context_ldt(struct mm_struct *mm) { }
-+static inline int ldt_dup_context(struct mm_struct *oldmm,
-+                                struct mm_struct *mm)
- {
-       return 0;
- }
-@@ -136,15 +142,16 @@ static inline int init_new_context(struct task_struct *tsk,
-       mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
-       atomic64_set(&mm->context.tlb_gen, 0);
- 
--      #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-       if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
-               /* pkey 0 is the default and always allocated */
-               mm->context.pkey_allocation_map = 0x1;
-               /* -1 means unallocated or invalid */
-               mm->context.execute_only_pkey = -1;
-       }
--      #endif
--      return init_new_context_ldt(tsk, mm);
-+#endif
-+      init_new_context_ldt(mm);
-+      return 0;
- }
- static inline void destroy_context(struct mm_struct *mm)
- {
-@@ -180,7 +187,7 @@ do {                                               \
- static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
- {
-       paravirt_arch_dup_mmap(oldmm, mm);
--      return 0;
-+      return ldt_dup_context(oldmm, mm);
- }
- 
- static inline void arch_exit_mmap(struct mm_struct *mm)
-diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
-index 3e7208f0c350..74a5aaf13f3c 100644
---- a/arch/x86/kernel/ldt.c
-+++ b/arch/x86/kernel/ldt.c
-@@ -130,28 +130,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
- }
- 
- /*
-- * we do not have to muck with descriptors here, that is
-- * done in switch_mm() as needed.
-+ * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
-+ * the new task is not running, so nothing can be installed.
-  */
--int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
-+int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
- {
-       struct ldt_struct *new_ldt;
--      struct mm_struct *old_mm;
-       int retval = 0;
- 
--      init_rwsem(&mm->context.ldt_usr_sem);
--
--      old_mm = current->mm;
--      if (!old_mm) {
--              mm->context.ldt = NULL;
-+      if (!old_mm)
-               return 0;
--      }
- 
-       mutex_lock(&old_mm->context.lock);
--      if (!old_mm->context.ldt) {
--              mm->context.ldt = NULL;
-+      if (!old_mm->context.ldt)
-               goto out_unlock;
--      }
- 
-       new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
-       if (!new_ldt) {
-diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
-index 8e290c9b2c3f..783e1a754b78 100644
---- a/tools/testing/selftests/x86/ldt_gdt.c
-+++ b/tools/testing/selftests/x86/ldt_gdt.c
-@@ -626,13 +626,10 @@ static void do_multicpu_tests(void)
- static int finish_exec_test(void)
- {
-       /*
--       * In a sensible world, this would be check_invalid_segment(0, 1);
--       * For better or for worse, though, the LDT is inherited across exec.
--       * We can probably change this safely, but for now we test it.
-+       * Older kernel versions did inherit the LDT on exec() which is
-+       * wrong because exec() starts from a clean state.
-        */
--      check_valid_segment(0, 1,
--                          AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
--                          42, true);
-+      check_invalid_segment(0, 1);
- 
-       return nerrs ? 1 : 0;
- }
--- 
-2.14.2
-
diff --git a/patches/kernel/0172-x86-ldt-Rework-locking.patch b/patches/kernel/0172-x86-ldt-Rework-locking.patch

new file mode 100644 (file)

index 0000000..a8c17a4
--- /dev/null
+++ b/patches/kernel/0172-x86-ldt-Rework-locking.patch
@@ -0,0 +1,199 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 14 Dec 2017 12:27:30 +0100
+Subject: [PATCH] x86/ldt: Rework locking
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The LDT is duplicated on fork() and on exec(), which is wrong as exec()
+should start from a clean state, i.e. without LDT. To fix this the LDT
+duplication code will be moved into arch_dup_mmap() which is only called
+for fork().
+
+This introduces a locking problem. arch_dup_mmap() holds mmap_sem of the
+parent process, but the LDT duplication code needs to acquire
+mm->context.lock to access the LDT data safely, which is the reverse lock
+order of write_ldt() where mmap_sem nests into context.lock.
+
+Solve this by introducing a new rw semaphore which serializes the
+read/write_ldt() syscall operations and use context.lock to protect the
+actual installment of the LDT descriptor.
+
+So context.lock stabilizes mm->context.ldt and can nest inside of the new
+semaphore or mmap_sem.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirsky <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: dan.j.williams@intel.com
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: kirill.shutemov@linux.intel.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit c2b3496bb30bd159e9de42e5c952e1f1f33c9a77)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit bf7ee649ccc71ef9acb713a00472886c19e78684)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/mmu.h         |  4 +++-
+ arch/x86/include/asm/mmu_context.h |  2 ++
+ arch/x86/kernel/ldt.c              | 33 +++++++++++++++++++++------------
+ 3 files changed, 26 insertions(+), 13 deletions(-)
+
+diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
+index bb8c597c2248..2d7e852b2dad 100644
+--- a/arch/x86/include/asm/mmu.h
++++ b/arch/x86/include/asm/mmu.h
+@@ -2,6 +2,7 @@
+ #define _ASM_X86_MMU_H
+ 
+ #include <linux/spinlock.h>
++#include <linux/rwsem.h>
+ #include <linux/mutex.h>
+ #include <linux/atomic.h>
+ 
+@@ -26,7 +27,8 @@ typedef struct {
+       atomic64_t tlb_gen;
+ 
+ #ifdef CONFIG_MODIFY_LDT_SYSCALL
+-      struct ldt_struct *ldt;
++      struct rw_semaphore     ldt_usr_sem;
++      struct ldt_struct       *ldt;
+ #endif
+ 
+ #ifdef CONFIG_X86_64
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index 9be54d9c04c4..dd865c2acb9d 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -131,6 +131,8 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+ static inline int init_new_context(struct task_struct *tsk,
+                                  struct mm_struct *mm)
+ {
++      mutex_init(&mm->context.lock);
++
+       mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
+       atomic64_set(&mm->context.tlb_gen, 0);
+ 
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
+index b8be2413cb74..3e7208f0c350 100644
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -4,6 +4,11 @@
+  * Copyright (C) 2002 Andi Kleen
+  *
+  * This handles calls from both 32bit and 64bit mode.
++ *
++ * Lock order:
++ *    contex.ldt_usr_sem
++ *      mmap_sem
++ *        context.lock
+  */
+ 
+ #include <linux/errno.h>
+@@ -41,7 +46,7 @@ static void refresh_ldt_segments(void)
+ #endif
+ }
+ 
+-/* context.lock is held for us, so we don't need any locking. */
++/* context.lock is held by the task which issued the smp function call */
+ static void flush_ldt(void *__mm)
+ {
+       struct mm_struct *mm = __mm;
+@@ -98,15 +103,17 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
+       paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
+ }
+ 
+-/* context.lock is held */
+-static void install_ldt(struct mm_struct *current_mm,
+-                      struct ldt_struct *ldt)
++static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
+ {
++      mutex_lock(&mm->context.lock);
++
+       /* Synchronizes with READ_ONCE in load_mm_ldt. */
+-      smp_store_release(&current_mm->context.ldt, ldt);
++      smp_store_release(&mm->context.ldt, ldt);
+ 
+-      /* Activate the LDT for all CPUs using current_mm. */
+-      on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
++      /* Activate the LDT for all CPUs using currents mm. */
++      on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
++
++      mutex_unlock(&mm->context.lock);
+ }
+ 
+ static void free_ldt_struct(struct ldt_struct *ldt)
+@@ -132,7 +139,8 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
+       struct mm_struct *old_mm;
+       int retval = 0;
+ 
+-      mutex_init(&mm->context.lock);
++      init_rwsem(&mm->context.ldt_usr_sem);
++
+       old_mm = current->mm;
+       if (!old_mm) {
+               mm->context.ldt = NULL;
+@@ -179,7 +187,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
+       unsigned long entries_size;
+       int retval;
+ 
+-      mutex_lock(&mm->context.lock);
++      down_read(&mm->context.ldt_usr_sem);
+ 
+       if (!mm->context.ldt) {
+               retval = 0;
+@@ -208,7 +216,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
+       retval = bytecount;
+ 
+ out_unlock:
+-      mutex_unlock(&mm->context.lock);
++      up_read(&mm->context.ldt_usr_sem);
+       return retval;
+ }
+ 
+@@ -268,7 +276,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
+                       ldt.avl = 0;
+       }
+ 
+-      mutex_lock(&mm->context.lock);
++      if (down_write_killable(&mm->context.ldt_usr_sem))
++              return -EINTR;
+ 
+       old_ldt       = mm->context.ldt;
+       old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
+@@ -290,7 +299,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
+       error = 0;
+ 
+ out_unlock:
+-      mutex_unlock(&mm->context.lock);
++      up_write(&mm->context.ldt_usr_sem);
+ out:
+       return error;
+ }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0173-x86-ldt-Prevent-LDT-inheritance-on-exec.patch b/patches/kernel/0173-x86-ldt-Prevent-LDT-inheritance-on-exec.patch

new file mode 100644 (file)

index 0000000..4348215
--- /dev/null
+++ b/patches/kernel/0173-x86-ldt-Prevent-LDT-inheritance-on-exec.patch
@@ -0,0 +1,177 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Dec 2017 12:27:31 +0100
+Subject: [PATCH] x86/ldt: Prevent LDT inheritance on exec
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The LDT is inherited across fork() or exec(), but that makes no sense
+at all because exec() is supposed to start the process clean.
+
+The reason why this happens is that init_new_context_ldt() is called from
+init_new_context() which obviously needs to be called for both fork() and
+exec().
+
+It would be surprising if anything relies on that behaviour, so it seems to
+be safe to remove that misfeature.
+
+Split the context initialization into two parts. Clear the LDT pointer and
+initialize the mutex from the general context init and move the LDT
+duplication to arch_dup_mmap() which is only called on fork().
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirsky <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: dan.j.williams@intel.com
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: kirill.shutemov@linux.intel.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit a4828f81037f491b2cc986595e3a969a6eeb2fb5)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f90d254204df4b336731f23bb5417226f51e8651)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/mmu_context.h    | 21 ++++++++++++++-------
+ arch/x86/kernel/ldt.c                 | 18 +++++-------------
+ tools/testing/selftests/x86/ldt_gdt.c |  9 +++------
+ 3 files changed, 22 insertions(+), 26 deletions(-)
+
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index dd865c2acb9d..47ec51a821e8 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -56,11 +56,17 @@ struct ldt_struct {
+ /*
+  * Used for LDT copy/destruction.
+  */
+-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
++static inline void init_new_context_ldt(struct mm_struct *mm)
++{
++      mm->context.ldt = NULL;
++      init_rwsem(&mm->context.ldt_usr_sem);
++}
++int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
+ void destroy_context_ldt(struct mm_struct *mm);
+ #else /* CONFIG_MODIFY_LDT_SYSCALL */
+-static inline int init_new_context_ldt(struct task_struct *tsk,
+-                                     struct mm_struct *mm)
++static inline void init_new_context_ldt(struct mm_struct *mm) { }
++static inline int ldt_dup_context(struct mm_struct *oldmm,
++                                struct mm_struct *mm)
+ {
+       return 0;
+ }
+@@ -136,15 +142,16 @@ static inline int init_new_context(struct task_struct *tsk,
+       mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
+       atomic64_set(&mm->context.tlb_gen, 0);
+ 
+-      #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
++#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+       if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
+               /* pkey 0 is the default and always allocated */
+               mm->context.pkey_allocation_map = 0x1;
+               /* -1 means unallocated or invalid */
+               mm->context.execute_only_pkey = -1;
+       }
+-      #endif
+-      return init_new_context_ldt(tsk, mm);
++#endif
++      init_new_context_ldt(mm);
++      return 0;
+ }
+ static inline void destroy_context(struct mm_struct *mm)
+ {
+@@ -180,7 +187,7 @@ do {                                               \
+ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+ {
+       paravirt_arch_dup_mmap(oldmm, mm);
+-      return 0;
++      return ldt_dup_context(oldmm, mm);
+ }
+ 
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
+index 3e7208f0c350..74a5aaf13f3c 100644
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -130,28 +130,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
+ }
+ 
+ /*
+- * we do not have to muck with descriptors here, that is
+- * done in switch_mm() as needed.
++ * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
++ * the new task is not running, so nothing can be installed.
+  */
+-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
++int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
+ {
+       struct ldt_struct *new_ldt;
+-      struct mm_struct *old_mm;
+       int retval = 0;
+ 
+-      init_rwsem(&mm->context.ldt_usr_sem);
+-
+-      old_mm = current->mm;
+-      if (!old_mm) {
+-              mm->context.ldt = NULL;
++      if (!old_mm)
+               return 0;
+-      }
+ 
+       mutex_lock(&old_mm->context.lock);
+-      if (!old_mm->context.ldt) {
+-              mm->context.ldt = NULL;
++      if (!old_mm->context.ldt)
+               goto out_unlock;
+-      }
+ 
+       new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
+       if (!new_ldt) {
+diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
+index 8e290c9b2c3f..783e1a754b78 100644
+--- a/tools/testing/selftests/x86/ldt_gdt.c
++++ b/tools/testing/selftests/x86/ldt_gdt.c
+@@ -626,13 +626,10 @@ static void do_multicpu_tests(void)
+ static int finish_exec_test(void)
+ {
+       /*
+-       * In a sensible world, this would be check_invalid_segment(0, 1);
+-       * For better or for worse, though, the LDT is inherited across exec.
+-       * We can probably change this safely, but for now we test it.
++       * Older kernel versions did inherit the LDT on exec() which is
++       * wrong because exec() starts from a clean state.
+        */
+-      check_valid_segment(0, 1,
+-                          AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
+-                          42, true);
++      check_invalid_segment(0, 1);
+ 
+       return nerrs ? 1 : 0;
+ }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0173-x86-mm-64-Improve-the-memory-map-documentation.patch b/patches/kernel/0173-x86-mm-64-Improve-the-memory-map-documentation.patch

deleted file mode 100644 (file)

index 453153a..0000000
--- a/patches/kernel/0173-x86-mm-64-Improve-the-memory-map-documentation.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Tue, 12 Dec 2017 07:56:43 -0800
-Subject: [PATCH] x86/mm/64: Improve the memory map documentation
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The old docs had the vsyscall range wrong and were missing the fixmap.
-Fix both.
-
-There used to be 8 MB reserved for future vsyscalls, but that's long gone.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Kees Cook <keescook@chromium.org>
-Cc: Kirill A. Shutemov <kirill@shutemov.name>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 5a7ccf4754fb3660569a6de52ba7f7fc3dfaf280)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d694898656126d8a04e86f681c8fe34ea57f1b85)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/x86/x86_64/mm.txt | 10 ++++++----
- 1 file changed, 6 insertions(+), 4 deletions(-)
-
-diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
-index 3448e675b462..83ca5a3b90ac 100644
---- a/Documentation/x86/x86_64/mm.txt
-+++ b/Documentation/x86/x86_64/mm.txt
-@@ -19,8 +19,9 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
- ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
- ... unused hole ...
- ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
--ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
--ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
-+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space (variable)
-+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
-+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
- ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
- 
- Virtual memory map with 5 level page tables:
-@@ -41,8 +42,9 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
- ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
- ... unused hole ...
- ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
--ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
--ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
-+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space
-+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
-+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
- ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
- 
- Architecture defines a 64-bit virtual address. Implementations can support
--- 
-2.14.2
-
diff --git a/patches/kernel/0174-x86-doc-Remove-obvious-weirdnesses-from-the-x86-MM-l.patch b/patches/kernel/0174-x86-doc-Remove-obvious-weirdnesses-from-the-x86-MM-l.patch

deleted file mode 100644 (file)

index ddc6428..0000000
--- a/patches/kernel/0174-x86-doc-Remove-obvious-weirdnesses-from-the-x86-MM-l.patch
+++ /dev/null
@@ -1,85 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Tue, 5 Dec 2017 13:34:54 +0100
-Subject: [PATCH] x86/doc: Remove obvious weirdnesses from the x86 MM layout
- documentation
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit e8ffe96e5933d417195268478479933d56213a3f)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d9012133906878a404cf47acc168ff9e4b10e379)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/x86/x86_64/mm.txt | 12 +++---------
- 1 file changed, 3 insertions(+), 9 deletions(-)
-
-diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
-index 83ca5a3b90ac..63a41671d25b 100644
---- a/Documentation/x86/x86_64/mm.txt
-+++ b/Documentation/x86/x86_64/mm.txt
-@@ -1,6 +1,4 @@
- 
--<previous description obsolete, deleted>
--
- Virtual memory map with 4 level page tables:
- 
- 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
-@@ -49,8 +47,9 @@ ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
- 
- Architecture defines a 64-bit virtual address. Implementations can support
- less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
--through to the most-significant implemented bit are set to either all ones
--or all zero. This causes hole between user space and kernel addresses.
-+through to the most-significant implemented bit are sign extended.
-+This causes hole between user space and kernel addresses if you interpret them
-+as unsigned.
- 
- The direct mapping covers all memory in the system up to the highest
- memory address (this means in some cases it can also include PCI memory
-@@ -60,9 +59,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
- the processes using the page fault handler, with init_top_pgt as
- reference.
- 
--Current X86-64 implementations support up to 46 bits of address space (64 TB),
--which is our current limit. This expands into MBZ space in the page tables.
--
- We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
- memory window (this size is arbitrary, it can be raised later if needed).
- The mappings are not part of any other kernel PGD and are only available
-@@ -74,5 +70,3 @@ following fixmap section.
- Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
- physical memory, vmalloc/ioremap space and virtual memory map are randomized.
- Their order is preserved but their base will be offset early at boot time.
--
---Andi Kleen, Jul 2004
--- 
-2.14.2
-
diff --git a/patches/kernel/0174-x86-mm-64-Improve-the-memory-map-documentation.patch b/patches/kernel/0174-x86-mm-64-Improve-the-memory-map-documentation.patch

new file mode 100644 (file)

index 0000000..453153a
--- /dev/null
+++ b/patches/kernel/0174-x86-mm-64-Improve-the-memory-map-documentation.patch
@@ -0,0 +1,70 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 12 Dec 2017 07:56:43 -0800
+Subject: [PATCH] x86/mm/64: Improve the memory map documentation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The old docs had the vsyscall range wrong and were missing the fixmap.
+Fix both.
+
+There used to be 8 MB reserved for future vsyscalls, but that's long gone.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 5a7ccf4754fb3660569a6de52ba7f7fc3dfaf280)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d694898656126d8a04e86f681c8fe34ea57f1b85)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/x86/x86_64/mm.txt | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
+index 3448e675b462..83ca5a3b90ac 100644
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -19,8 +19,9 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+ ... unused hole ...
+ ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
+-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
+-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
++ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space (variable)
++[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
++ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
+ ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+ 
+ Virtual memory map with 5 level page tables:
+@@ -41,8 +42,9 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+ ... unused hole ...
+ ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
+-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
+-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
++ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space
++[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
++ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
+ ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+ 
+ Architecture defines a 64-bit virtual address. Implementations can support
+-- 
+2.14.2
+
diff --git a/patches/kernel/0175-x86-doc-Remove-obvious-weirdnesses-from-the-x86-MM-l.patch b/patches/kernel/0175-x86-doc-Remove-obvious-weirdnesses-from-the-x86-MM-l.patch

new file mode 100644 (file)

index 0000000..ddc6428
--- /dev/null
+++ b/patches/kernel/0175-x86-doc-Remove-obvious-weirdnesses-from-the-x86-MM-l.patch
@@ -0,0 +1,85 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:54 +0100
+Subject: [PATCH] x86/doc: Remove obvious weirdnesses from the x86 MM layout
+ documentation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit e8ffe96e5933d417195268478479933d56213a3f)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d9012133906878a404cf47acc168ff9e4b10e379)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/x86/x86_64/mm.txt | 12 +++---------
+ 1 file changed, 3 insertions(+), 9 deletions(-)
+
+diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
+index 83ca5a3b90ac..63a41671d25b 100644
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -1,6 +1,4 @@
+ 
+-<previous description obsolete, deleted>
+-
+ Virtual memory map with 4 level page tables:
+ 
+ 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
+@@ -49,8 +47,9 @@ ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+ 
+ Architecture defines a 64-bit virtual address. Implementations can support
+ less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
+-through to the most-significant implemented bit are set to either all ones
+-or all zero. This causes hole between user space and kernel addresses.
++through to the most-significant implemented bit are sign extended.
++This causes hole between user space and kernel addresses if you interpret them
++as unsigned.
+ 
+ The direct mapping covers all memory in the system up to the highest
+ memory address (this means in some cases it can also include PCI memory
+@@ -60,9 +59,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
+ the processes using the page fault handler, with init_top_pgt as
+ reference.
+ 
+-Current X86-64 implementations support up to 46 bits of address space (64 TB),
+-which is our current limit. This expands into MBZ space in the page tables.
+-
+ We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
+ memory window (this size is arbitrary, it can be raised later if needed).
+ The mappings are not part of any other kernel PGD and are only available
+@@ -74,5 +70,3 @@ following fixmap section.
+ Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
+ physical memory, vmalloc/ioremap space and virtual memory map are randomized.
+ Their order is preserved but their base will be offset early at boot time.
+-
+--Andi Kleen, Jul 2004
+-- 
+2.14.2
+
diff --git a/patches/kernel/0175-x86-entry-Rename-SYSENTER_stack-to-CPU_ENTRY_AREA_en.patch b/patches/kernel/0175-x86-entry-Rename-SYSENTER_stack-to-CPU_ENTRY_AREA_en.patch

deleted file mode 100644 (file)

index a48ffba..0000000
--- a/patches/kernel/0175-x86-entry-Rename-SYSENTER_stack-to-CPU_ENTRY_AREA_en.patch
+++ /dev/null
@@ -1,346 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 17:25:07 -0800
-Subject: [PATCH] x86/entry: Rename SYSENTER_stack to
- CPU_ENTRY_AREA_entry_stack
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-If the kernel oopses while on the trampoline stack, it will print
-"<SYSENTER>" even if SYSENTER is not involved.  That is rather confusing.
-
-The "SYSENTER" stack is used for a lot more than SYSENTER now.  Give it a
-better string to display in stack dumps, and rename the kernel code to
-match.
-
-Also move the 32-bit code over to the new naming even though it still uses
-the entry stack only for SYSENTER.
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bp@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 4fe2d8b11a370af286287a2661de9d4e6c9a145a)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit e0437c473463f208c2b4952f0826e43ce1335a53)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/fixmap.h     |  8 ++++----
- arch/x86/include/asm/processor.h  |  6 +++---
- arch/x86/include/asm/stacktrace.h |  4 ++--
- arch/x86/kernel/asm-offsets.c     |  4 ++--
- arch/x86/kernel/asm-offsets_32.c  |  2 +-
- arch/x86/kernel/cpu/common.c      | 14 +++++++-------
- arch/x86/kernel/dumpstack.c       | 10 +++++-----
- arch/x86/kernel/dumpstack_32.c    |  6 +++---
- arch/x86/kernel/dumpstack_64.c    | 12 +++++++++---
- arch/x86/entry/entry_32.S         | 12 ++++++------
- arch/x86/entry/entry_64.S         |  4 ++--
- 11 files changed, 44 insertions(+), 38 deletions(-)
-
-diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
-index 5dc269ff4085..a7fb137ad964 100644
---- a/arch/x86/include/asm/fixmap.h
-+++ b/arch/x86/include/asm/fixmap.h
-@@ -56,10 +56,10 @@ struct cpu_entry_area {
-       char gdt[PAGE_SIZE];
- 
-       /*
--       * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
-+       * The GDT is just below entry_stack and thus serves (on x86_64) as
-        * a a read-only guard page.
-        */
--      struct SYSENTER_stack_page SYSENTER_stack_page;
-+      struct entry_stack_page entry_stack_page;
- 
-       /*
-        * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
-@@ -230,9 +230,9 @@ static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
-       return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
- }
- 
--static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
-+static inline struct entry_stack *cpu_entry_stack(int cpu)
- {
--      return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
-+      return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
- }
- 
- #endif /* !__ASSEMBLY__ */
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 59a317f8e0ec..935d68609922 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -330,12 +330,12 @@ struct x86_hw_tss {
- #define IO_BITMAP_OFFSET              (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
- #define INVALID_IO_BITMAP_OFFSET      0x8000
- 
--struct SYSENTER_stack {
-+struct entry_stack {
-       unsigned long           words[64];
- };
- 
--struct SYSENTER_stack_page {
--      struct SYSENTER_stack stack;
-+struct entry_stack_page {
-+      struct entry_stack stack;
- } __aligned(PAGE_SIZE);
- 
- struct tss_struct {
-diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
-index 95f999576131..3b3cc5ba579a 100644
---- a/arch/x86/include/asm/stacktrace.h
-+++ b/arch/x86/include/asm/stacktrace.h
-@@ -15,7 +15,7 @@ enum stack_type {
-       STACK_TYPE_TASK,
-       STACK_TYPE_IRQ,
-       STACK_TYPE_SOFTIRQ,
--      STACK_TYPE_SYSENTER,
-+      STACK_TYPE_ENTRY,
-       STACK_TYPE_EXCEPTION,
-       STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
- };
-@@ -28,7 +28,7 @@ struct stack_info {
- bool in_task_stack(unsigned long *stack, struct task_struct *task,
-                  struct stack_info *info);
- 
--bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
-+bool in_entry_stack(unsigned long *stack, struct stack_info *info);
- 
- int get_stack_info(unsigned long *stack, struct task_struct *task,
-                  struct stack_info *info, unsigned long *visit_mask);
-diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
-index 40c3fab107ac..25b4832e9c28 100644
---- a/arch/x86/kernel/asm-offsets.c
-+++ b/arch/x86/kernel/asm-offsets.c
-@@ -96,6 +96,6 @@ void common(void) {
-       /* Layout info for cpu_entry_area */
-       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
-       OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
--      OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
--      DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
-+      OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
-+      DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
- }
-diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
-index c4f23da7a0f0..4dba34cb777d 100644
---- a/arch/x86/kernel/asm-offsets_32.c
-+++ b/arch/x86/kernel/asm-offsets_32.c
-@@ -50,7 +50,7 @@ void foo(void)
- 
-       /* Offset from the sysenter stack to tss.sp0 */
-       DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
--             offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
-+             offsetofend(struct cpu_entry_area, entry_stack_page.stack));
- 
- #ifdef CONFIG_CC_STACKPROTECTOR
-       BLANK();
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index fcdba90e0890..7a8a5d436566 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -487,8 +487,8 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
- #endif
- 
--static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
--                                 SYSENTER_stack_storage);
-+static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page,
-+                                 entry_stack_storage);
- 
- static void __init
- set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
-@@ -523,8 +523,8 @@ static void __init setup_cpu_entry_area(int cpu)
- #endif
- 
-       __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
--      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
--                              per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
-+      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
-+                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
-                               PAGE_KERNEL);
- 
-       /*
-@@ -1315,7 +1315,7 @@ void enable_sep_cpu(void)
- 
-       tss->x86_tss.ss1 = __KERNEL_CS;
-       wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
--      wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
-+      wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
-       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
- 
-       put_cpu();
-@@ -1441,7 +1441,7 @@ void syscall_init(void)
-        * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
-        */
-       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
--      wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
-+      wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
-       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
- #else
-       wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
-@@ -1655,7 +1655,7 @@ void cpu_init(void)
-        */
-       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
-       load_TR_desc();
--      load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
-+      load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
- 
-       load_mm_ldt(&init_mm);
- 
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index b005e5ef6738..55bf1c3b5319 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -43,9 +43,9 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
-       return true;
- }
- 
--bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
-+bool in_entry_stack(unsigned long *stack, struct stack_info *info)
- {
--      struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
-+      struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
- 
-       void *begin = ss;
-       void *end = ss + 1;
-@@ -53,7 +53,7 @@ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
-       if ((void *)stack < begin || (void *)stack >= end)
-               return false;
- 
--      info->type      = STACK_TYPE_SYSENTER;
-+      info->type      = STACK_TYPE_ENTRY;
-       info->begin     = begin;
-       info->end       = end;
-       info->next_sp   = NULL;
-@@ -111,13 +111,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-        * - task stack
-        * - interrupt stack
-        * - HW exception stacks (double fault, nmi, debug, mce)
--       * - SYSENTER stack
-+       * - entry stack
-        *
-        * x86-32 can have up to four stacks:
-        * - task stack
-        * - softirq stack
-        * - hardirq stack
--       * - SYSENTER stack
-+       * - entry stack
-        */
-       for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
-               const char *stack_name;
-diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
-index 3160bf2d100e..4580ba0204f6 100644
---- a/arch/x86/kernel/dumpstack_32.c
-+++ b/arch/x86/kernel/dumpstack_32.c
-@@ -25,8 +25,8 @@ const char *stack_type_name(enum stack_type type)
-       if (type == STACK_TYPE_SOFTIRQ)
-               return "SOFTIRQ";
- 
--      if (type == STACK_TYPE_SYSENTER)
--              return "SYSENTER";
-+      if (type == STACK_TYPE_ENTRY)
-+              return "ENTRY_TRAMPOLINE";
- 
-       return NULL;
- }
-@@ -95,7 +95,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
-       if (task != current)
-               goto unknown;
- 
--      if (in_sysenter_stack(stack, info))
-+      if (in_entry_stack(stack, info))
-               goto recursion_check;
- 
-       if (in_hardirq_stack(stack, info))
-diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
-index f5107b659f86..7d9c0e06afc2 100644
---- a/arch/x86/kernel/dumpstack_64.c
-+++ b/arch/x86/kernel/dumpstack_64.c
-@@ -36,8 +36,14 @@ const char *stack_type_name(enum stack_type type)
-       if (type == STACK_TYPE_IRQ)
-               return "IRQ";
- 
--      if (type == STACK_TYPE_SYSENTER)
--              return "SYSENTER";
-+      if (type == STACK_TYPE_ENTRY) {
-+              /*
-+               * On 64-bit, we have a generic entry stack that we
-+               * use for all the kernel entry points, including
-+               * SYSENTER.
-+               */
-+              return "ENTRY_TRAMPOLINE";
-+      }
- 
-       if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
-               return exception_stack_names[type - STACK_TYPE_EXCEPTION];
-@@ -117,7 +123,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
-       if (in_irq_stack(stack, info))
-               goto recursion_check;
- 
--      if (in_sysenter_stack(stack, info))
-+      if (in_entry_stack(stack, info))
-               goto recursion_check;
- 
-       goto unknown;
-diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
-index 3ef7800007f8..634c6a78885c 100644
---- a/arch/x86/entry/entry_32.S
-+++ b/arch/x86/entry/entry_32.S
-@@ -949,9 +949,9 @@ ENTRY(debug)
- 
-       /* Are we currently on the SYSENTER stack? */
-       movl    PER_CPU_VAR(cpu_entry_area), %ecx
--      addl    $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
--      subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
--      cmpl    $SIZEOF_SYSENTER_stack, %ecx
-+      addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
-+      subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
-+      cmpl    $SIZEOF_entry_stack, %ecx
-       jb      .Ldebug_from_sysenter_stack
- 
-       TRACE_IRQS_OFF
-@@ -993,9 +993,9 @@ ENTRY(nmi)
- 
-       /* Are we currently on the SYSENTER stack? */
-       movl    PER_CPU_VAR(cpu_entry_area), %ecx
--      addl    $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
--      subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
--      cmpl    $SIZEOF_SYSENTER_stack, %ecx
-+      addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
-+      subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
-+      cmpl    $SIZEOF_entry_stack, %ecx
-       jb      .Lnmi_from_sysenter_stack
- 
-       /* Not on SYSENTER stack. */
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 157860b3569f..03e052f02176 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -153,8 +153,8 @@ END(native_usergs_sysret64)
-       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
- 
- /* The top word of the SYSENTER stack is hot and is usable as scratch space. */
--#define RSP_SCRATCH   CPU_ENTRY_AREA_SYSENTER_stack + \
--                      SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
-+#define RSP_SCRATCH   CPU_ENTRY_AREA_entry_stack + \
-+                      SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
- 
- ENTRY(entry_SYSCALL_64_trampoline)
-       UNWIND_HINT_EMPTY
--- 
-2.14.2
-
diff --git a/patches/kernel/0176-x86-entry-Rename-SYSENTER_stack-to-CPU_ENTRY_AREA_en.patch b/patches/kernel/0176-x86-entry-Rename-SYSENTER_stack-to-CPU_ENTRY_AREA_en.patch

new file mode 100644 (file)

index 0000000..a48ffba
--- /dev/null
+++ b/patches/kernel/0176-x86-entry-Rename-SYSENTER_stack-to-CPU_ENTRY_AREA_en.patch
@@ -0,0 +1,346 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 17:25:07 -0800
+Subject: [PATCH] x86/entry: Rename SYSENTER_stack to
+ CPU_ENTRY_AREA_entry_stack
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+If the kernel oopses while on the trampoline stack, it will print
+"<SYSENTER>" even if SYSENTER is not involved.  That is rather confusing.
+
+The "SYSENTER" stack is used for a lot more than SYSENTER now.  Give it a
+better string to display in stack dumps, and rename the kernel code to
+match.
+
+Also move the 32-bit code over to the new naming even though it still uses
+the entry stack only for SYSENTER.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 4fe2d8b11a370af286287a2661de9d4e6c9a145a)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit e0437c473463f208c2b4952f0826e43ce1335a53)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/fixmap.h     |  8 ++++----
+ arch/x86/include/asm/processor.h  |  6 +++---
+ arch/x86/include/asm/stacktrace.h |  4 ++--
+ arch/x86/kernel/asm-offsets.c     |  4 ++--
+ arch/x86/kernel/asm-offsets_32.c  |  2 +-
+ arch/x86/kernel/cpu/common.c      | 14 +++++++-------
+ arch/x86/kernel/dumpstack.c       | 10 +++++-----
+ arch/x86/kernel/dumpstack_32.c    |  6 +++---
+ arch/x86/kernel/dumpstack_64.c    | 12 +++++++++---
+ arch/x86/entry/entry_32.S         | 12 ++++++------
+ arch/x86/entry/entry_64.S         |  4 ++--
+ 11 files changed, 44 insertions(+), 38 deletions(-)
+
+diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
+index 5dc269ff4085..a7fb137ad964 100644
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -56,10 +56,10 @@ struct cpu_entry_area {
+       char gdt[PAGE_SIZE];
+ 
+       /*
+-       * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
++       * The GDT is just below entry_stack and thus serves (on x86_64) as
+        * a a read-only guard page.
+        */
+-      struct SYSENTER_stack_page SYSENTER_stack_page;
++      struct entry_stack_page entry_stack_page;
+ 
+       /*
+        * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
+@@ -230,9 +230,9 @@ static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
+       return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
+ }
+ 
+-static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
++static inline struct entry_stack *cpu_entry_stack(int cpu)
+ {
+-      return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
++      return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+ }
+ 
+ #endif /* !__ASSEMBLY__ */
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 59a317f8e0ec..935d68609922 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -330,12 +330,12 @@ struct x86_hw_tss {
+ #define IO_BITMAP_OFFSET              (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
+ #define INVALID_IO_BITMAP_OFFSET      0x8000
+ 
+-struct SYSENTER_stack {
++struct entry_stack {
+       unsigned long           words[64];
+ };
+ 
+-struct SYSENTER_stack_page {
+-      struct SYSENTER_stack stack;
++struct entry_stack_page {
++      struct entry_stack stack;
+ } __aligned(PAGE_SIZE);
+ 
+ struct tss_struct {
+diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
+index 95f999576131..3b3cc5ba579a 100644
+--- a/arch/x86/include/asm/stacktrace.h
++++ b/arch/x86/include/asm/stacktrace.h
+@@ -15,7 +15,7 @@ enum stack_type {
+       STACK_TYPE_TASK,
+       STACK_TYPE_IRQ,
+       STACK_TYPE_SOFTIRQ,
+-      STACK_TYPE_SYSENTER,
++      STACK_TYPE_ENTRY,
+       STACK_TYPE_EXCEPTION,
+       STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
+ };
+@@ -28,7 +28,7 @@ struct stack_info {
+ bool in_task_stack(unsigned long *stack, struct task_struct *task,
+                  struct stack_info *info);
+ 
+-bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
++bool in_entry_stack(unsigned long *stack, struct stack_info *info);
+ 
+ int get_stack_info(unsigned long *stack, struct task_struct *task,
+                  struct stack_info *info, unsigned long *visit_mask);
+diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
+index 40c3fab107ac..25b4832e9c28 100644
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -96,6 +96,6 @@ void common(void) {
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+       OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+-      OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
+-      DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
++      OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
++      DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
+ }
+diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
+index c4f23da7a0f0..4dba34cb777d 100644
+--- a/arch/x86/kernel/asm-offsets_32.c
++++ b/arch/x86/kernel/asm-offsets_32.c
+@@ -50,7 +50,7 @@ void foo(void)
+ 
+       /* Offset from the sysenter stack to tss.sp0 */
+       DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
+-             offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
++             offsetofend(struct cpu_entry_area, entry_stack_page.stack));
+ 
+ #ifdef CONFIG_CC_STACKPROTECTOR
+       BLANK();
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index fcdba90e0890..7a8a5d436566 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -487,8 +487,8 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+ #endif
+ 
+-static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
+-                                 SYSENTER_stack_storage);
++static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page,
++                                 entry_stack_storage);
+ 
+ static void __init
+ set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
+@@ -523,8 +523,8 @@ static void __init setup_cpu_entry_area(int cpu)
+ #endif
+ 
+       __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
+-                              per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
++                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
+                               PAGE_KERNEL);
+ 
+       /*
+@@ -1315,7 +1315,7 @@ void enable_sep_cpu(void)
+ 
+       tss->x86_tss.ss1 = __KERNEL_CS;
+       wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
+-      wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
++      wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
+       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
+ 
+       put_cpu();
+@@ -1441,7 +1441,7 @@ void syscall_init(void)
+        * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+        */
+       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+-      wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
++      wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
+       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+ #else
+       wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
+@@ -1655,7 +1655,7 @@ void cpu_init(void)
+        */
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+       load_TR_desc();
+-      load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
++      load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
+ 
+       load_mm_ldt(&init_mm);
+ 
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index b005e5ef6738..55bf1c3b5319 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -43,9 +43,9 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
+       return true;
+ }
+ 
+-bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
++bool in_entry_stack(unsigned long *stack, struct stack_info *info)
+ {
+-      struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
++      struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
+ 
+       void *begin = ss;
+       void *end = ss + 1;
+@@ -53,7 +53,7 @@ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
+       if ((void *)stack < begin || (void *)stack >= end)
+               return false;
+ 
+-      info->type      = STACK_TYPE_SYSENTER;
++      info->type      = STACK_TYPE_ENTRY;
+       info->begin     = begin;
+       info->end       = end;
+       info->next_sp   = NULL;
+@@ -111,13 +111,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+        * - task stack
+        * - interrupt stack
+        * - HW exception stacks (double fault, nmi, debug, mce)
+-       * - SYSENTER stack
++       * - entry stack
+        *
+        * x86-32 can have up to four stacks:
+        * - task stack
+        * - softirq stack
+        * - hardirq stack
+-       * - SYSENTER stack
++       * - entry stack
+        */
+       for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+               const char *stack_name;
+diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
+index 3160bf2d100e..4580ba0204f6 100644
+--- a/arch/x86/kernel/dumpstack_32.c
++++ b/arch/x86/kernel/dumpstack_32.c
+@@ -25,8 +25,8 @@ const char *stack_type_name(enum stack_type type)
+       if (type == STACK_TYPE_SOFTIRQ)
+               return "SOFTIRQ";
+ 
+-      if (type == STACK_TYPE_SYSENTER)
+-              return "SYSENTER";
++      if (type == STACK_TYPE_ENTRY)
++              return "ENTRY_TRAMPOLINE";
+ 
+       return NULL;
+ }
+@@ -95,7 +95,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
+       if (task != current)
+               goto unknown;
+ 
+-      if (in_sysenter_stack(stack, info))
++      if (in_entry_stack(stack, info))
+               goto recursion_check;
+ 
+       if (in_hardirq_stack(stack, info))
+diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
+index f5107b659f86..7d9c0e06afc2 100644
+--- a/arch/x86/kernel/dumpstack_64.c
++++ b/arch/x86/kernel/dumpstack_64.c
+@@ -36,8 +36,14 @@ const char *stack_type_name(enum stack_type type)
+       if (type == STACK_TYPE_IRQ)
+               return "IRQ";
+ 
+-      if (type == STACK_TYPE_SYSENTER)
+-              return "SYSENTER";
++      if (type == STACK_TYPE_ENTRY) {
++              /*
++               * On 64-bit, we have a generic entry stack that we
++               * use for all the kernel entry points, including
++               * SYSENTER.
++               */
++              return "ENTRY_TRAMPOLINE";
++      }
+ 
+       if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
+               return exception_stack_names[type - STACK_TYPE_EXCEPTION];
+@@ -117,7 +123,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
+       if (in_irq_stack(stack, info))
+               goto recursion_check;
+ 
+-      if (in_sysenter_stack(stack, info))
++      if (in_entry_stack(stack, info))
+               goto recursion_check;
+ 
+       goto unknown;
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
+index 3ef7800007f8..634c6a78885c 100644
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -949,9 +949,9 @@ ENTRY(debug)
+ 
+       /* Are we currently on the SYSENTER stack? */
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+-      addl    $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+-      subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+-      cmpl    $SIZEOF_SYSENTER_stack, %ecx
++      addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
++      subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
++      cmpl    $SIZEOF_entry_stack, %ecx
+       jb      .Ldebug_from_sysenter_stack
+ 
+       TRACE_IRQS_OFF
+@@ -993,9 +993,9 @@ ENTRY(nmi)
+ 
+       /* Are we currently on the SYSENTER stack? */
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+-      addl    $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
+-      subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
+-      cmpl    $SIZEOF_SYSENTER_stack, %ecx
++      addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
++      subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
++      cmpl    $SIZEOF_entry_stack, %ecx
+       jb      .Lnmi_from_sysenter_stack
+ 
+       /* Not on SYSENTER stack. */
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 157860b3569f..03e052f02176 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -153,8 +153,8 @@ END(native_usergs_sysret64)
+       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+ 
+ /* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+-#define RSP_SCRATCH   CPU_ENTRY_AREA_SYSENTER_stack + \
+-                      SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
++#define RSP_SCRATCH   CPU_ENTRY_AREA_entry_stack + \
++                      SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
+ 
+ ENTRY(entry_SYSCALL_64_trampoline)
+       UNWIND_HINT_EMPTY
+-- 
+2.14.2
+
diff --git a/patches/kernel/0176-x86-uv-Use-the-right-TLB-flush-API.patch b/patches/kernel/0176-x86-uv-Use-the-right-TLB-flush-API.patch

deleted file mode 100644 (file)

index cffa705..0000000
--- a/patches/kernel/0176-x86-uv-Use-the-right-TLB-flush-API.patch
+++ /dev/null
@@ -1,64 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Tue, 5 Dec 2017 13:34:50 +0100
-Subject: [PATCH] x86/uv: Use the right TLB-flush API
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Since uv_flush_tlb_others() implements flush_tlb_others() which is
-about flushing user mappings, we should use __flush_tlb_single(),
-which too is about flushing user mappings.
-
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Acked-by: Andrew Banman <abanman@hpe.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Mike Travis <mike.travis@hpe.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 3e46e0f5ee3643a1239be9046c7ba6c66ca2b329)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 532216cdf02174dc08ca998b570c4699899fa355)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/platform/uv/tlb_uv.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
-index f44c0bc95aa2..8538a6723171 100644
---- a/arch/x86/platform/uv/tlb_uv.c
-+++ b/arch/x86/platform/uv/tlb_uv.c
-@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
-               local_flush_tlb();
-               stat->d_alltlb++;
-       } else {
--              __flush_tlb_one(msg->address);
-+              __flush_tlb_single(msg->address);
-               stat->d_onetlb++;
-       }
-       stat->d_requestee++;
--- 
-2.14.2
-
diff --git a/patches/kernel/0177-x86-microcode-Dont-abuse-the-TLB-flush-interface.patch b/patches/kernel/0177-x86-microcode-Dont-abuse-the-TLB-flush-interface.patch

deleted file mode 100644 (file)

index 7873a60..0000000
--- a/patches/kernel/0177-x86-microcode-Dont-abuse-the-TLB-flush-interface.patch
+++ /dev/null
@@ -1,126 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Tue, 5 Dec 2017 13:34:51 +0100
-Subject: [PATCH] x86/microcode: Dont abuse the TLB-flush interface
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Commit:
-
-  ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU")
-
-... grubbed into tlbflush internals without coherent explanation.
-
-Since it says its a precaution and the SDM doesn't mention anything like
-this, take it out back.
-
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: fenghua.yu@intel.com
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 23cb7d46f371844c004784ad9552a57446f73e5a)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 0f3d96d1e5aa4d9538ab1a918fb49f2c57ebb6f5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/tlbflush.h       | 19 ++++++-------------
- arch/x86/kernel/cpu/microcode/intel.c | 13 -------------
- 2 files changed, 6 insertions(+), 26 deletions(-)
-
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 6533da3036c9..6d2688a6fda0 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -234,20 +234,9 @@ static inline void __native_flush_tlb(void)
-       preempt_enable();
- }
- 
--static inline void __native_flush_tlb_global_irq_disabled(void)
--{
--      unsigned long cr4;
--
--      cr4 = this_cpu_read(cpu_tlbstate.cr4);
--      /* clear PGE */
--      native_write_cr4(cr4 & ~X86_CR4_PGE);
--      /* write old PGE again and flush TLBs */
--      native_write_cr4(cr4);
--}
--
- static inline void __native_flush_tlb_global(void)
- {
--      unsigned long flags;
-+      unsigned long cr4, flags;
- 
-       if (static_cpu_has(X86_FEATURE_INVPCID)) {
-               /*
-@@ -265,7 +254,11 @@ static inline void __native_flush_tlb_global(void)
-        */
-       raw_local_irq_save(flags);
- 
--      __native_flush_tlb_global_irq_disabled();
-+      cr4 = this_cpu_read(cpu_tlbstate.cr4);
-+      /* toggle PGE */
-+      native_write_cr4(cr4 ^ X86_CR4_PGE);
-+      /* write old PGE again and flush TLBs */
-+      native_write_cr4(cr4);
- 
-       raw_local_irq_restore(flags);
- }
-diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
-index 636a5fcfdeb7..d9a8f69101aa 100644
---- a/arch/x86/kernel/cpu/microcode/intel.c
-+++ b/arch/x86/kernel/cpu/microcode/intel.c
-@@ -564,15 +564,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
- }
- #else
- 
--/*
-- * Flush global tlb. We only do this in x86_64 where paging has been enabled
-- * already and PGE should be enabled as well.
-- */
--static inline void flush_tlb_early(void)
--{
--      __native_flush_tlb_global_irq_disabled();
--}
--
- static inline void print_ucode(struct ucode_cpu_info *uci)
- {
-       struct microcode_intel *mc;
-@@ -601,10 +592,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
-       if (rev != mc->hdr.rev)
-               return -1;
- 
--#ifdef CONFIG_X86_64
--      /* Flush global tlb. This is precaution. */
--      flush_tlb_early();
--#endif
-       uci->cpu_sig.rev = rev;
- 
-       if (early)
--- 
-2.14.2
-
diff --git a/patches/kernel/0177-x86-uv-Use-the-right-TLB-flush-API.patch b/patches/kernel/0177-x86-uv-Use-the-right-TLB-flush-API.patch

new file mode 100644 (file)

index 0000000..cffa705
--- /dev/null
+++ b/patches/kernel/0177-x86-uv-Use-the-right-TLB-flush-API.patch
@@ -0,0 +1,64 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:50 +0100
+Subject: [PATCH] x86/uv: Use the right TLB-flush API
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Since uv_flush_tlb_others() implements flush_tlb_others() which is
+about flushing user mappings, we should use __flush_tlb_single(),
+which too is about flushing user mappings.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Andrew Banman <abanman@hpe.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Travis <mike.travis@hpe.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 3e46e0f5ee3643a1239be9046c7ba6c66ca2b329)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 532216cdf02174dc08ca998b570c4699899fa355)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/platform/uv/tlb_uv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
+index f44c0bc95aa2..8538a6723171 100644
+--- a/arch/x86/platform/uv/tlb_uv.c
++++ b/arch/x86/platform/uv/tlb_uv.c
+@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
+               local_flush_tlb();
+               stat->d_alltlb++;
+       } else {
+-              __flush_tlb_one(msg->address);
++              __flush_tlb_single(msg->address);
+               stat->d_onetlb++;
+       }
+       stat->d_requestee++;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0178-x86-microcode-Dont-abuse-the-TLB-flush-interface.patch b/patches/kernel/0178-x86-microcode-Dont-abuse-the-TLB-flush-interface.patch

new file mode 100644 (file)

index 0000000..7873a60
--- /dev/null
+++ b/patches/kernel/0178-x86-microcode-Dont-abuse-the-TLB-flush-interface.patch
@@ -0,0 +1,126 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:51 +0100
+Subject: [PATCH] x86/microcode: Dont abuse the TLB-flush interface
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Commit:
+
+  ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU")
+
+... grubbed into tlbflush internals without coherent explanation.
+
+Since it says its a precaution and the SDM doesn't mention anything like
+this, take it out back.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: fenghua.yu@intel.com
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 23cb7d46f371844c004784ad9552a57446f73e5a)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 0f3d96d1e5aa4d9538ab1a918fb49f2c57ebb6f5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/tlbflush.h       | 19 ++++++-------------
+ arch/x86/kernel/cpu/microcode/intel.c | 13 -------------
+ 2 files changed, 6 insertions(+), 26 deletions(-)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 6533da3036c9..6d2688a6fda0 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -234,20 +234,9 @@ static inline void __native_flush_tlb(void)
+       preempt_enable();
+ }
+ 
+-static inline void __native_flush_tlb_global_irq_disabled(void)
+-{
+-      unsigned long cr4;
+-
+-      cr4 = this_cpu_read(cpu_tlbstate.cr4);
+-      /* clear PGE */
+-      native_write_cr4(cr4 & ~X86_CR4_PGE);
+-      /* write old PGE again and flush TLBs */
+-      native_write_cr4(cr4);
+-}
+-
+ static inline void __native_flush_tlb_global(void)
+ {
+-      unsigned long flags;
++      unsigned long cr4, flags;
+ 
+       if (static_cpu_has(X86_FEATURE_INVPCID)) {
+               /*
+@@ -265,7 +254,11 @@ static inline void __native_flush_tlb_global(void)
+        */
+       raw_local_irq_save(flags);
+ 
+-      __native_flush_tlb_global_irq_disabled();
++      cr4 = this_cpu_read(cpu_tlbstate.cr4);
++      /* toggle PGE */
++      native_write_cr4(cr4 ^ X86_CR4_PGE);
++      /* write old PGE again and flush TLBs */
++      native_write_cr4(cr4);
+ 
+       raw_local_irq_restore(flags);
+ }
+diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
+index 636a5fcfdeb7..d9a8f69101aa 100644
+--- a/arch/x86/kernel/cpu/microcode/intel.c
++++ b/arch/x86/kernel/cpu/microcode/intel.c
+@@ -564,15 +564,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
+ }
+ #else
+ 
+-/*
+- * Flush global tlb. We only do this in x86_64 where paging has been enabled
+- * already and PGE should be enabled as well.
+- */
+-static inline void flush_tlb_early(void)
+-{
+-      __native_flush_tlb_global_irq_disabled();
+-}
+-
+ static inline void print_ucode(struct ucode_cpu_info *uci)
+ {
+       struct microcode_intel *mc;
+@@ -601,10 +592,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
+       if (rev != mc->hdr.rev)
+               return -1;
+ 
+-#ifdef CONFIG_X86_64
+-      /* Flush global tlb. This is precaution. */
+-      flush_tlb_early();
+-#endif
+       uci->cpu_sig.rev = rev;
+ 
+       if (early)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0178-x86-mm-Use-__flush_tlb_one-for-kernel-memory.patch b/patches/kernel/0178-x86-mm-Use-__flush_tlb_one-for-kernel-memory.patch

deleted file mode 100644 (file)

index 0474955..0000000
--- a/patches/kernel/0178-x86-mm-Use-__flush_tlb_one-for-kernel-memory.patch
+++ /dev/null
@@ -1,61 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Tue, 5 Dec 2017 13:34:49 +0100
-Subject: [PATCH] x86/mm: Use __flush_tlb_one() for kernel memory
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-__flush_tlb_single() is for user mappings, __flush_tlb_one() for
-kernel mappings.
-
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit a501686b2923ce6f2ff2b1d0d50682c6411baf72)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9d23f46143933cd29576b6aa2b1827f3f39b9cf8)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/tlb.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index ed06f1593390..5b4342c5039c 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -546,7 +546,7 @@ static void do_kernel_range_flush(void *info)
- 
-       /* flush range by one by one 'invlpg' */
-       for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
--              __flush_tlb_single(addr);
-+              __flush_tlb_one(addr);
- }
- 
- void flush_tlb_kernel_range(unsigned long start, unsigned long end)
--- 
-2.14.2
-
diff --git a/patches/kernel/0179-x86-mm-Remove-superfluous-barriers.patch b/patches/kernel/0179-x86-mm-Remove-superfluous-barriers.patch

deleted file mode 100644 (file)

index d3017fc..0000000
--- a/patches/kernel/0179-x86-mm-Remove-superfluous-barriers.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Tue, 5 Dec 2017 13:34:46 +0100
-Subject: [PATCH] x86/mm: Remove superfluous barriers
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-atomic64_inc_return() already implies smp_mb() before and after.
-
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit b5fc6d943808b570bdfbec80f40c6b3855f1c48b)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 31a37930df33315a7006b46706f6babdb57db1f4)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/tlbflush.h | 8 +-------
- 1 file changed, 1 insertion(+), 7 deletions(-)
-
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 6d2688a6fda0..bc1460b4737b 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -59,19 +59,13 @@ static inline void invpcid_flush_all_nonglobals(void)
- 
- static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
- {
--      u64 new_tlb_gen;
--
-       /*
-        * Bump the generation count.  This also serves as a full barrier
-        * that synchronizes with switch_mm(): callers are required to order
-        * their read of mm_cpumask after their writes to the paging
-        * structures.
-        */
--      smp_mb__before_atomic();
--      new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
--      smp_mb__after_atomic();
--
--      return new_tlb_gen;
-+      return atomic64_inc_return(&mm->context.tlb_gen);
- }
- 
- #ifdef CONFIG_PARAVIRT
--- 
-2.14.2
-
diff --git a/patches/kernel/0179-x86-mm-Use-__flush_tlb_one-for-kernel-memory.patch b/patches/kernel/0179-x86-mm-Use-__flush_tlb_one-for-kernel-memory.patch

new file mode 100644 (file)

index 0000000..0474955
--- /dev/null
+++ b/patches/kernel/0179-x86-mm-Use-__flush_tlb_one-for-kernel-memory.patch
@@ -0,0 +1,61 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:49 +0100
+Subject: [PATCH] x86/mm: Use __flush_tlb_one() for kernel memory
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+__flush_tlb_single() is for user mappings, __flush_tlb_one() for
+kernel mappings.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit a501686b2923ce6f2ff2b1d0d50682c6411baf72)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9d23f46143933cd29576b6aa2b1827f3f39b9cf8)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/tlb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index ed06f1593390..5b4342c5039c 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -546,7 +546,7 @@ static void do_kernel_range_flush(void *info)
+ 
+       /* flush range by one by one 'invlpg' */
+       for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
+-              __flush_tlb_single(addr);
++              __flush_tlb_one(addr);
+ }
+ 
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0180-x86-mm-Add-comments-to-clarify-which-TLB-flush-funct.patch b/patches/kernel/0180-x86-mm-Add-comments-to-clarify-which-TLB-flush-funct.patch

deleted file mode 100644 (file)

index d750f16..0000000
--- a/patches/kernel/0180-x86-mm-Add-comments-to-clarify-which-TLB-flush-funct.patch
+++ /dev/null
@@ -1,113 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Tue, 5 Dec 2017 13:34:52 +0100
-Subject: [PATCH] x86/mm: Add comments to clarify which TLB-flush functions are
- supposed to flush what
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Per popular request..
-
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit 3f67af51e56f291d7417d77c4f67cd774633c5e1)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8394b666c2b3b1fc5279a897c96b196531923f3b)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/tlbflush.h | 24 ++++++++++++++++++++++--
- 1 file changed, 22 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index bc1460b4737b..ed5d483c4a1b 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -216,6 +216,10 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
-       cr4_set_bits(mask);
- }
- 
-+
-+/*
-+ * flush the entire current user mapping
-+ */
- static inline void __native_flush_tlb(void)
- {
-       /*
-@@ -228,6 +232,9 @@ static inline void __native_flush_tlb(void)
-       preempt_enable();
- }
- 
-+/*
-+ * flush everything
-+ */
- static inline void __native_flush_tlb_global(void)
- {
-       unsigned long cr4, flags;
-@@ -257,17 +264,27 @@ static inline void __native_flush_tlb_global(void)
-       raw_local_irq_restore(flags);
- }
- 
-+/*
-+ * flush one page in the user mapping
-+ */
- static inline void __native_flush_tlb_single(unsigned long addr)
- {
-       asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
- }
- 
-+/*
-+ * flush everything
-+ */
- static inline void __flush_tlb_all(void)
- {
--      if (boot_cpu_has(X86_FEATURE_PGE))
-+      if (boot_cpu_has(X86_FEATURE_PGE)) {
-               __flush_tlb_global();
--      else
-+      } else {
-+              /*
-+               * !PGE -> !PCID (setup_pcid()), thus every flush is total.
-+               */
-               __flush_tlb();
-+      }
- 
-       /*
-        * Note: if we somehow had PCID but not PGE, then this wouldn't work --
-@@ -278,6 +295,9 @@ static inline void __flush_tlb_all(void)
-        */
- }
- 
-+/*
-+ * flush one page in the kernel mapping
-+ */
- static inline void __flush_tlb_one(unsigned long addr)
- {
-       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
--- 
-2.14.2
-
diff --git a/patches/kernel/0180-x86-mm-Remove-superfluous-barriers.patch b/patches/kernel/0180-x86-mm-Remove-superfluous-barriers.patch

new file mode 100644 (file)

index 0000000..d3017fc
--- /dev/null
+++ b/patches/kernel/0180-x86-mm-Remove-superfluous-barriers.patch
@@ -0,0 +1,72 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:46 +0100
+Subject: [PATCH] x86/mm: Remove superfluous barriers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+atomic64_inc_return() already implies smp_mb() before and after.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit b5fc6d943808b570bdfbec80f40c6b3855f1c48b)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 31a37930df33315a7006b46706f6babdb57db1f4)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/tlbflush.h | 8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 6d2688a6fda0..bc1460b4737b 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -59,19 +59,13 @@ static inline void invpcid_flush_all_nonglobals(void)
+ 
+ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ {
+-      u64 new_tlb_gen;
+-
+       /*
+        * Bump the generation count.  This also serves as a full barrier
+        * that synchronizes with switch_mm(): callers are required to order
+        * their read of mm_cpumask after their writes to the paging
+        * structures.
+        */
+-      smp_mb__before_atomic();
+-      new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
+-      smp_mb__after_atomic();
+-
+-      return new_tlb_gen;
++      return atomic64_inc_return(&mm->context.tlb_gen);
+ }
+ 
+ #ifdef CONFIG_PARAVIRT
+-- 
+2.14.2
+
diff --git a/patches/kernel/0181-x86-mm-Add-comments-to-clarify-which-TLB-flush-funct.patch b/patches/kernel/0181-x86-mm-Add-comments-to-clarify-which-TLB-flush-funct.patch

new file mode 100644 (file)

index 0000000..d750f16
--- /dev/null
+++ b/patches/kernel/0181-x86-mm-Add-comments-to-clarify-which-TLB-flush-funct.patch
@@ -0,0 +1,113 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:52 +0100
+Subject: [PATCH] x86/mm: Add comments to clarify which TLB-flush functions are
+ supposed to flush what
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Per popular request..
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit 3f67af51e56f291d7417d77c4f67cd774633c5e1)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8394b666c2b3b1fc5279a897c96b196531923f3b)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/tlbflush.h | 24 ++++++++++++++++++++++--
+ 1 file changed, 22 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index bc1460b4737b..ed5d483c4a1b 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -216,6 +216,10 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
+       cr4_set_bits(mask);
+ }
+ 
++
++/*
++ * flush the entire current user mapping
++ */
+ static inline void __native_flush_tlb(void)
+ {
+       /*
+@@ -228,6 +232,9 @@ static inline void __native_flush_tlb(void)
+       preempt_enable();
+ }
+ 
++/*
++ * flush everything
++ */
+ static inline void __native_flush_tlb_global(void)
+ {
+       unsigned long cr4, flags;
+@@ -257,17 +264,27 @@ static inline void __native_flush_tlb_global(void)
+       raw_local_irq_restore(flags);
+ }
+ 
++/*
++ * flush one page in the user mapping
++ */
+ static inline void __native_flush_tlb_single(unsigned long addr)
+ {
+       asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ }
+ 
++/*
++ * flush everything
++ */
+ static inline void __flush_tlb_all(void)
+ {
+-      if (boot_cpu_has(X86_FEATURE_PGE))
++      if (boot_cpu_has(X86_FEATURE_PGE)) {
+               __flush_tlb_global();
+-      else
++      } else {
++              /*
++               * !PGE -> !PCID (setup_pcid()), thus every flush is total.
++               */
+               __flush_tlb();
++      }
+ 
+       /*
+        * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+@@ -278,6 +295,9 @@ static inline void __flush_tlb_all(void)
+        */
+ }
+ 
++/*
++ * flush one page in the kernel mapping
++ */
+ static inline void __flush_tlb_one(unsigned long addr)
+ {
+       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0181-x86-mm-Move-the-CR3-construction-functions-to-tlbflu.patch b/patches/kernel/0181-x86-mm-Move-the-CR3-construction-functions-to-tlbflu.patch

deleted file mode 100644 (file)

index 8927557..0000000
--- a/patches/kernel/0181-x86-mm-Move-the-CR3-construction-functions-to-tlbflu.patch
+++ /dev/null
@@ -1,179 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:07:54 +0100
-Subject: [PATCH] x86/mm: Move the CR3 construction functions to tlbflush.h
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-For flushing the TLB, the ASID which has been programmed into the hardware
-must be known.  That differs from what is in 'cpu_tlbstate'.
-
-Add functions to transform the 'cpu_tlbstate' values into to the one
-programmed into the hardware (CR3).
-
-It's not easy to include mmu_context.h into tlbflush.h, so just move the
-CR3 building over to tlbflush.h.
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 50fb83a62cf472dc53ba23bd3f7bd6c1b2b3b53e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f741923acf51c1061c11b45a168f8864d37dc5cd)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/mmu_context.h | 29 +----------------------------
- arch/x86/include/asm/tlbflush.h    | 26 ++++++++++++++++++++++++++
- arch/x86/mm/tlb.c                  |  8 ++++----
- 3 files changed, 31 insertions(+), 32 deletions(-)
-
-diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index 47ec51a821e8..89a01ad7e370 100644
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -289,33 +289,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-       return __pkru_allows_pkey(vma_pkey(vma), write);
- }
- 
--/*
-- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
-- * bits.  This serves two purposes.  It prevents a nasty situation in
-- * which PCID-unaware code saves CR3, loads some other value (with PCID
-- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
-- * the saved ASID was nonzero.  It also means that any bugs involving
-- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
-- * deterministically.
-- */
--
--static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
--{
--      if (static_cpu_has(X86_FEATURE_PCID)) {
--              VM_WARN_ON_ONCE(asid > 4094);
--              return __sme_pa(mm->pgd) | (asid + 1);
--      } else {
--              VM_WARN_ON_ONCE(asid != 0);
--              return __sme_pa(mm->pgd);
--      }
--}
--
--static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
--{
--      VM_WARN_ON_ONCE(asid > 4094);
--      return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
--}
--
- /*
-  * This can be used from process context to figure out what the value of
-  * CR3 is without needing to do a (slow) __read_cr3().
-@@ -325,7 +298,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
-  */
- static inline unsigned long __get_current_cr3_fast(void)
- {
--      unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
-+      unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
-               this_cpu_read(cpu_tlbstate.loaded_mm_asid));
- 
-       /* For now, be very restrictive about when this can be called. */
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index ed5d483c4a1b..3a421b164868 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -68,6 +68,32 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
-       return atomic64_inc_return(&mm->context.tlb_gen);
- }
- 
-+/*
-+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits.
-+ * This serves two purposes.  It prevents a nasty situation in which
-+ * PCID-unaware code saves CR3, loads some other value (with PCID == 0),
-+ * and then restores CR3, thus corrupting the TLB for ASID 0 if the saved
-+ * ASID was nonzero.  It also means that any bugs involving loading a
-+ * PCID-enabled CR3 with CR4.PCIDE off will trigger deterministically.
-+ */
-+struct pgd_t;
-+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
-+{
-+      if (static_cpu_has(X86_FEATURE_PCID)) {
-+              VM_WARN_ON_ONCE(asid > 4094);
-+              return __sme_pa(pgd) | (asid + 1);
-+      } else {
-+              VM_WARN_ON_ONCE(asid != 0);
-+              return __sme_pa(pgd);
-+      }
-+}
-+
-+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
-+{
-+      VM_WARN_ON_ONCE(asid > 4094);
-+      return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH;
-+}
-+
- #ifdef CONFIG_PARAVIRT
- #include <asm/paravirt.h>
- #else
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 5b4342c5039c..87d4f961bcb4 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -126,7 +126,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-        * does something like write_cr3(read_cr3_pa()).
-        */
- #ifdef CONFIG_DEBUG_VM
--      if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
-+      if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
-               /*
-                * If we were to BUG here, we'd be very likely to kill
-                * the system so hard that we don't see the call trace.
-@@ -193,7 +193,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-               if (need_flush) {
-                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
-                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
--                      write_cr3(build_cr3(next, new_asid));
-+                      write_cr3(build_cr3(next->pgd, new_asid));
- 
-                       /*
-                        * NB: This gets called via leave_mm() in the idle path
-@@ -206,7 +206,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-                       trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-               } else {
-                       /* The new ASID is already up to date. */
--                      write_cr3(build_cr3_noflush(next, new_asid));
-+                      write_cr3(build_cr3_noflush(next->pgd, new_asid));
- 
-                       /* See above wrt _rcuidle. */
-                       trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
-@@ -283,7 +283,7 @@ void initialize_tlbstate_and_flush(void)
-               !(cr4_read_shadow() & X86_CR4_PCIDE));
- 
-       /* Force ASID 0 and force a TLB flush. */
--      write_cr3(build_cr3(mm, 0));
-+      write_cr3(build_cr3(mm->pgd, 0));
- 
-       /* Reinitialize tlbstate. */
-       this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
--- 
-2.14.2
-
diff --git a/patches/kernel/0182-x86-mm-Move-the-CR3-construction-functions-to-tlbflu.patch b/patches/kernel/0182-x86-mm-Move-the-CR3-construction-functions-to-tlbflu.patch

new file mode 100644 (file)

index 0000000..8927557
--- /dev/null
+++ b/patches/kernel/0182-x86-mm-Move-the-CR3-construction-functions-to-tlbflu.patch
@@ -0,0 +1,179 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:54 +0100
+Subject: [PATCH] x86/mm: Move the CR3 construction functions to tlbflush.h
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+For flushing the TLB, the ASID which has been programmed into the hardware
+must be known.  That differs from what is in 'cpu_tlbstate'.
+
+Add functions to transform the 'cpu_tlbstate' values into to the one
+programmed into the hardware (CR3).
+
+It's not easy to include mmu_context.h into tlbflush.h, so just move the
+CR3 building over to tlbflush.h.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 50fb83a62cf472dc53ba23bd3f7bd6c1b2b3b53e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f741923acf51c1061c11b45a168f8864d37dc5cd)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/mmu_context.h | 29 +----------------------------
+ arch/x86/include/asm/tlbflush.h    | 26 ++++++++++++++++++++++++++
+ arch/x86/mm/tlb.c                  |  8 ++++----
+ 3 files changed, 31 insertions(+), 32 deletions(-)
+
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index 47ec51a821e8..89a01ad7e370 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -289,33 +289,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+       return __pkru_allows_pkey(vma_pkey(vma), write);
+ }
+ 
+-/*
+- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
+- * bits.  This serves two purposes.  It prevents a nasty situation in
+- * which PCID-unaware code saves CR3, loads some other value (with PCID
+- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
+- * the saved ASID was nonzero.  It also means that any bugs involving
+- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
+- * deterministically.
+- */
+-
+-static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
+-{
+-      if (static_cpu_has(X86_FEATURE_PCID)) {
+-              VM_WARN_ON_ONCE(asid > 4094);
+-              return __sme_pa(mm->pgd) | (asid + 1);
+-      } else {
+-              VM_WARN_ON_ONCE(asid != 0);
+-              return __sme_pa(mm->pgd);
+-      }
+-}
+-
+-static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+-{
+-      VM_WARN_ON_ONCE(asid > 4094);
+-      return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
+-}
+-
+ /*
+  * This can be used from process context to figure out what the value of
+  * CR3 is without needing to do a (slow) __read_cr3().
+@@ -325,7 +298,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+  */
+ static inline unsigned long __get_current_cr3_fast(void)
+ {
+-      unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
++      unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
+               this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+ 
+       /* For now, be very restrictive about when this can be called. */
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index ed5d483c4a1b..3a421b164868 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -68,6 +68,32 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+       return atomic64_inc_return(&mm->context.tlb_gen);
+ }
+ 
++/*
++ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits.
++ * This serves two purposes.  It prevents a nasty situation in which
++ * PCID-unaware code saves CR3, loads some other value (with PCID == 0),
++ * and then restores CR3, thus corrupting the TLB for ASID 0 if the saved
++ * ASID was nonzero.  It also means that any bugs involving loading a
++ * PCID-enabled CR3 with CR4.PCIDE off will trigger deterministically.
++ */
++struct pgd_t;
++static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
++{
++      if (static_cpu_has(X86_FEATURE_PCID)) {
++              VM_WARN_ON_ONCE(asid > 4094);
++              return __sme_pa(pgd) | (asid + 1);
++      } else {
++              VM_WARN_ON_ONCE(asid != 0);
++              return __sme_pa(pgd);
++      }
++}
++
++static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
++{
++      VM_WARN_ON_ONCE(asid > 4094);
++      return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH;
++}
++
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #else
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 5b4342c5039c..87d4f961bcb4 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -126,7 +126,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+        * does something like write_cr3(read_cr3_pa()).
+        */
+ #ifdef CONFIG_DEBUG_VM
+-      if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
++      if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
+               /*
+                * If we were to BUG here, we'd be very likely to kill
+                * the system so hard that we don't see the call trace.
+@@ -193,7 +193,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+               if (need_flush) {
+                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
+                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
+-                      write_cr3(build_cr3(next, new_asid));
++                      write_cr3(build_cr3(next->pgd, new_asid));
+ 
+                       /*
+                        * NB: This gets called via leave_mm() in the idle path
+@@ -206,7 +206,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+                       trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+               } else {
+                       /* The new ASID is already up to date. */
+-                      write_cr3(build_cr3_noflush(next, new_asid));
++                      write_cr3(build_cr3_noflush(next->pgd, new_asid));
+ 
+                       /* See above wrt _rcuidle. */
+                       trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
+@@ -283,7 +283,7 @@ void initialize_tlbstate_and_flush(void)
+               !(cr4_read_shadow() & X86_CR4_PCIDE));
+ 
+       /* Force ASID 0 and force a TLB flush. */
+-      write_cr3(build_cr3(mm, 0));
++      write_cr3(build_cr3(mm->pgd, 0));
+ 
+       /* Reinitialize tlbstate. */
+       this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0182-x86-mm-Remove-hard-coded-ASID-limit-checks.patch b/patches/kernel/0182-x86-mm-Remove-hard-coded-ASID-limit-checks.patch

deleted file mode 100644 (file)

index e6c5079..0000000
--- a/patches/kernel/0182-x86-mm-Remove-hard-coded-ASID-limit-checks.patch
+++ /dev/null
@@ -1,96 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:07:55 +0100
-Subject: [PATCH] x86/mm: Remove hard-coded ASID limit checks
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-First, it's nice to remove the magic numbers.
-
-Second, PAGE_TABLE_ISOLATION is going to consume half of the available ASID
-space.  The space is currently unused, but add a comment to spell out this
-new restriction.
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit cb0a9144a744e55207e24dcef812f05cd15a499a)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit fd5d001ae73ccd382d4270f53e27dcf61c4e4749)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/tlbflush.h | 20 ++++++++++++++++++--
- 1 file changed, 18 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 3a421b164868..c1c10db4156c 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -68,6 +68,22 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
-       return atomic64_inc_return(&mm->context.tlb_gen);
- }
- 
-+/* There are 12 bits of space for ASIDS in CR3 */
-+#define CR3_HW_ASID_BITS              12
-+/*
-+ * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
-+ * user/kernel switches
-+ */
-+#define PTI_CONSUMED_ASID_BITS                0
-+
-+#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
-+/*
-+ * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
-+ * for them being zero-based.  Another -1 is because ASID 0 is reserved for
-+ * use by non-PCID-aware users.
-+ */
-+#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
-+
- /*
-  * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits.
-  * This serves two purposes.  It prevents a nasty situation in which
-@@ -80,7 +96,7 @@ struct pgd_t;
- static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
- {
-       if (static_cpu_has(X86_FEATURE_PCID)) {
--              VM_WARN_ON_ONCE(asid > 4094);
-+              VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
-               return __sme_pa(pgd) | (asid + 1);
-       } else {
-               VM_WARN_ON_ONCE(asid != 0);
-@@ -90,7 +106,7 @@ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
- 
- static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
- {
--      VM_WARN_ON_ONCE(asid > 4094);
-+      VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
-       return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH;
- }
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0183-x86-mm-Put-MMU-to-hardware-ASID-translation-in-one-p.patch b/patches/kernel/0183-x86-mm-Put-MMU-to-hardware-ASID-translation-in-one-p.patch

deleted file mode 100644 (file)

index 51ab520..0000000
--- a/patches/kernel/0183-x86-mm-Put-MMU-to-hardware-ASID-translation-in-one-p.patch
+++ /dev/null
@@ -1,109 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:07:56 +0100
-Subject: [PATCH] x86/mm: Put MMU to hardware ASID translation in one place
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-There are effectively two ASID types:
-
- 1. The one stored in the mmu_context that goes from 0..5
- 2. The one programmed into the hardware that goes from 1..6
-
-This consolidates the locations where converting between the two (by doing
-a +1) to a single place which gives us a nice place to comment.
-PAGE_TABLE_ISOLATION will also need to, given an ASID, know which hardware
-ASID to flush for the userspace mapping.
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit dd95f1a4b5ca904c78e6a097091eb21436478abb)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 6f3e88a8f41123ac339d28cfdda5da0e85bec550)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/tlbflush.h | 31 +++++++++++++++++++------------
- 1 file changed, 19 insertions(+), 12 deletions(-)
-
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index c1c10db4156c..ecd634f87e4e 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -84,30 +84,37 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
-  */
- #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
- 
--/*
-- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits.
-- * This serves two purposes.  It prevents a nasty situation in which
-- * PCID-unaware code saves CR3, loads some other value (with PCID == 0),
-- * and then restores CR3, thus corrupting the TLB for ASID 0 if the saved
-- * ASID was nonzero.  It also means that any bugs involving loading a
-- * PCID-enabled CR3 with CR4.PCIDE off will trigger deterministically.
-- */
-+static inline u16 kern_pcid(u16 asid)
-+{
-+      VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
-+      /*
-+       * If PCID is on, ASID-aware code paths put the ASID+1 into the
-+       * PCID bits.  This serves two purposes.  It prevents a nasty
-+       * situation in which PCID-unaware code saves CR3, loads some other
-+       * value (with PCID == 0), and then restores CR3, thus corrupting
-+       * the TLB for ASID 0 if the saved ASID was nonzero.  It also means
-+       * that any bugs involving loading a PCID-enabled CR3 with
-+       * CR4.PCIDE off will trigger deterministically.
-+       */
-+      return asid + 1;
-+}
-+
- struct pgd_t;
- static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
- {
-       if (static_cpu_has(X86_FEATURE_PCID)) {
--              VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
--              return __sme_pa(pgd) | (asid + 1);
-+              return __pa(pgd) | kern_pcid(asid);
-       } else {
-               VM_WARN_ON_ONCE(asid != 0);
--              return __sme_pa(pgd);
-+              return __pa(pgd);
-       }
- }
- 
- static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
- {
-       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
--      return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH;
-+      VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
-+      return __pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
- }
- 
- #ifdef CONFIG_PARAVIRT
--- 
-2.14.2
-
diff --git a/patches/kernel/0183-x86-mm-Remove-hard-coded-ASID-limit-checks.patch b/patches/kernel/0183-x86-mm-Remove-hard-coded-ASID-limit-checks.patch

new file mode 100644 (file)

index 0000000..e6c5079
--- /dev/null
+++ b/patches/kernel/0183-x86-mm-Remove-hard-coded-ASID-limit-checks.patch
@@ -0,0 +1,96 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:55 +0100
+Subject: [PATCH] x86/mm: Remove hard-coded ASID limit checks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+First, it's nice to remove the magic numbers.
+
+Second, PAGE_TABLE_ISOLATION is going to consume half of the available ASID
+space.  The space is currently unused, but add a comment to spell out this
+new restriction.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit cb0a9144a744e55207e24dcef812f05cd15a499a)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit fd5d001ae73ccd382d4270f53e27dcf61c4e4749)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/tlbflush.h | 20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 3a421b164868..c1c10db4156c 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -68,6 +68,22 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+       return atomic64_inc_return(&mm->context.tlb_gen);
+ }
+ 
++/* There are 12 bits of space for ASIDS in CR3 */
++#define CR3_HW_ASID_BITS              12
++/*
++ * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
++ * user/kernel switches
++ */
++#define PTI_CONSUMED_ASID_BITS                0
++
++#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
++/*
++ * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
++ * for them being zero-based.  Another -1 is because ASID 0 is reserved for
++ * use by non-PCID-aware users.
++ */
++#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
++
+ /*
+  * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits.
+  * This serves two purposes.  It prevents a nasty situation in which
+@@ -80,7 +96,7 @@ struct pgd_t;
+ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+ {
+       if (static_cpu_has(X86_FEATURE_PCID)) {
+-              VM_WARN_ON_ONCE(asid > 4094);
++              VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+               return __sme_pa(pgd) | (asid + 1);
+       } else {
+               VM_WARN_ON_ONCE(asid != 0);
+@@ -90,7 +106,7 @@ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+ 
+ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+ {
+-      VM_WARN_ON_ONCE(asid > 4094);
++      VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+       return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH;
+ }
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0184-x86-mm-Create-asm-invpcid.h.patch b/patches/kernel/0184-x86-mm-Create-asm-invpcid.h.patch

deleted file mode 100644 (file)

index 78cf317..0000000
--- a/patches/kernel/0184-x86-mm-Create-asm-invpcid.h.patch
+++ /dev/null
@@ -1,168 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Tue, 5 Dec 2017 13:34:47 +0100
-Subject: [PATCH] x86/mm: Create asm/invpcid.h
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Unclutter tlbflush.h a little.
-
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 1a3b0caeb77edeac5ce5fa05e6a61c474c9a9745)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 5af02a8c43ce521f460891f6ba68af69428abe90)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/invpcid.h  | 53 +++++++++++++++++++++++++++++++++++++++++
- arch/x86/include/asm/tlbflush.h | 49 +------------------------------------
- 2 files changed, 54 insertions(+), 48 deletions(-)
- create mode 100644 arch/x86/include/asm/invpcid.h
-
-diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
-new file mode 100644
-index 000000000000..989cfa86de85
---- /dev/null
-+++ b/arch/x86/include/asm/invpcid.h
-@@ -0,0 +1,53 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+#ifndef _ASM_X86_INVPCID
-+#define _ASM_X86_INVPCID
-+
-+static inline void __invpcid(unsigned long pcid, unsigned long addr,
-+                           unsigned long type)
-+{
-+      struct { u64 d[2]; } desc = { { pcid, addr } };
-+
-+      /*
-+       * The memory clobber is because the whole point is to invalidate
-+       * stale TLB entries and, especially if we're flushing global
-+       * mappings, we don't want the compiler to reorder any subsequent
-+       * memory accesses before the TLB flush.
-+       *
-+       * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
-+       * invpcid (%rcx), %rax in long mode.
-+       */
-+      asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
-+                    : : "m" (desc), "a" (type), "c" (&desc) : "memory");
-+}
-+
-+#define INVPCID_TYPE_INDIV_ADDR               0
-+#define INVPCID_TYPE_SINGLE_CTXT      1
-+#define INVPCID_TYPE_ALL_INCL_GLOBAL  2
-+#define INVPCID_TYPE_ALL_NON_GLOBAL   3
-+
-+/* Flush all mappings for a given pcid and addr, not including globals. */
-+static inline void invpcid_flush_one(unsigned long pcid,
-+                                   unsigned long addr)
-+{
-+      __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
-+}
-+
-+/* Flush all mappings for a given PCID, not including globals. */
-+static inline void invpcid_flush_single_context(unsigned long pcid)
-+{
-+      __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
-+}
-+
-+/* Flush all mappings, including globals, for all PCIDs. */
-+static inline void invpcid_flush_all(void)
-+{
-+      __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
-+}
-+
-+/* Flush all mappings for all PCIDs except globals. */
-+static inline void invpcid_flush_all_nonglobals(void)
-+{
-+      __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
-+}
-+
-+#endif /* _ASM_X86_INVPCID */
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index ecd634f87e4e..503f87c30c15 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -8,54 +8,7 @@
- #include <asm/cpufeature.h>
- #include <asm/special_insns.h>
- #include <asm/smp.h>
--
--static inline void __invpcid(unsigned long pcid, unsigned long addr,
--                           unsigned long type)
--{
--      struct { u64 d[2]; } desc = { { pcid, addr } };
--
--      /*
--       * The memory clobber is because the whole point is to invalidate
--       * stale TLB entries and, especially if we're flushing global
--       * mappings, we don't want the compiler to reorder any subsequent
--       * memory accesses before the TLB flush.
--       *
--       * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
--       * invpcid (%rcx), %rax in long mode.
--       */
--      asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
--                    : : "m" (desc), "a" (type), "c" (&desc) : "memory");
--}
--
--#define INVPCID_TYPE_INDIV_ADDR               0
--#define INVPCID_TYPE_SINGLE_CTXT      1
--#define INVPCID_TYPE_ALL_INCL_GLOBAL  2
--#define INVPCID_TYPE_ALL_NON_GLOBAL   3
--
--/* Flush all mappings for a given pcid and addr, not including globals. */
--static inline void invpcid_flush_one(unsigned long pcid,
--                                   unsigned long addr)
--{
--      __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
--}
--
--/* Flush all mappings for a given PCID, not including globals. */
--static inline void invpcid_flush_single_context(unsigned long pcid)
--{
--      __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
--}
--
--/* Flush all mappings, including globals, for all PCIDs. */
--static inline void invpcid_flush_all(void)
--{
--      __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
--}
--
--/* Flush all mappings for all PCIDs except globals. */
--static inline void invpcid_flush_all_nonglobals(void)
--{
--      __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
--}
-+#include <asm/invpcid.h>
- 
- static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
- {
--- 
-2.14.2
-
diff --git a/patches/kernel/0184-x86-mm-Put-MMU-to-hardware-ASID-translation-in-one-p.patch b/patches/kernel/0184-x86-mm-Put-MMU-to-hardware-ASID-translation-in-one-p.patch

new file mode 100644 (file)

index 0000000..51ab520
--- /dev/null
+++ b/patches/kernel/0184-x86-mm-Put-MMU-to-hardware-ASID-translation-in-one-p.patch
@@ -0,0 +1,109 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:56 +0100
+Subject: [PATCH] x86/mm: Put MMU to hardware ASID translation in one place
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+There are effectively two ASID types:
+
+ 1. The one stored in the mmu_context that goes from 0..5
+ 2. The one programmed into the hardware that goes from 1..6
+
+This consolidates the locations where converting between the two (by doing
+a +1) to a single place which gives us a nice place to comment.
+PAGE_TABLE_ISOLATION will also need to, given an ASID, know which hardware
+ASID to flush for the userspace mapping.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit dd95f1a4b5ca904c78e6a097091eb21436478abb)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 6f3e88a8f41123ac339d28cfdda5da0e85bec550)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/tlbflush.h | 31 +++++++++++++++++++------------
+ 1 file changed, 19 insertions(+), 12 deletions(-)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index c1c10db4156c..ecd634f87e4e 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -84,30 +84,37 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+  */
+ #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
+ 
+-/*
+- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits.
+- * This serves two purposes.  It prevents a nasty situation in which
+- * PCID-unaware code saves CR3, loads some other value (with PCID == 0),
+- * and then restores CR3, thus corrupting the TLB for ASID 0 if the saved
+- * ASID was nonzero.  It also means that any bugs involving loading a
+- * PCID-enabled CR3 with CR4.PCIDE off will trigger deterministically.
+- */
++static inline u16 kern_pcid(u16 asid)
++{
++      VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
++      /*
++       * If PCID is on, ASID-aware code paths put the ASID+1 into the
++       * PCID bits.  This serves two purposes.  It prevents a nasty
++       * situation in which PCID-unaware code saves CR3, loads some other
++       * value (with PCID == 0), and then restores CR3, thus corrupting
++       * the TLB for ASID 0 if the saved ASID was nonzero.  It also means
++       * that any bugs involving loading a PCID-enabled CR3 with
++       * CR4.PCIDE off will trigger deterministically.
++       */
++      return asid + 1;
++}
++
+ struct pgd_t;
+ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+ {
+       if (static_cpu_has(X86_FEATURE_PCID)) {
+-              VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+-              return __sme_pa(pgd) | (asid + 1);
++              return __pa(pgd) | kern_pcid(asid);
+       } else {
+               VM_WARN_ON_ONCE(asid != 0);
+-              return __sme_pa(pgd);
++              return __pa(pgd);
+       }
+ }
+ 
+ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+ {
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+-      return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH;
++      VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
++      return __pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
+ }
+ 
+ #ifdef CONFIG_PARAVIRT
+-- 
+2.14.2
+
diff --git a/patches/kernel/0185-x86-cpu_entry_area-Move-it-to-a-separate-unit.patch b/patches/kernel/0185-x86-cpu_entry_area-Move-it-to-a-separate-unit.patch

deleted file mode 100644 (file)

index 84983a3..0000000
--- a/patches/kernel/0185-x86-cpu_entry_area-Move-it-to-a-separate-unit.patch
+++ /dev/null
@@ -1,400 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 20 Dec 2017 18:28:54 +0100
-Subject: [PATCH] x86/cpu_entry_area: Move it to a separate unit
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Separate the cpu_entry_area code out of cpu/common.c and the fixmap.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 0fa11d2cd3d67af676aa2762ade282ba6d09cbe5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/Makefile                  |   2 +-
- arch/x86/include/asm/cpu_entry_area.h |  52 +++++++++++++++++
- arch/x86/include/asm/fixmap.h         |  41 +-------------
- arch/x86/kernel/cpu/common.c          |  94 ------------------------------
- arch/x86/kernel/traps.c               |   1 +
- arch/x86/mm/cpu_entry_area.c          | 104 ++++++++++++++++++++++++++++++++++
- 6 files changed, 159 insertions(+), 135 deletions(-)
- create mode 100644 arch/x86/include/asm/cpu_entry_area.h
- create mode 100644 arch/x86/mm/cpu_entry_area.c
-
-diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
-index 0fbdcb64f9f8..76f5399a8356 100644
---- a/arch/x86/mm/Makefile
-+++ b/arch/x86/mm/Makefile
-@@ -2,7 +2,7 @@
- KCOV_INSTRUMENT_tlb.o := n
- 
- obj-y :=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
--          pat.o pgtable.o physaddr.o setup_nx.o tlb.o
-+          pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
- 
- # Make sure __phys_addr has no stackprotector
- nostackp := $(call cc-option, -fno-stack-protector)
-diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
-new file mode 100644
-index 000000000000..5471826803af
---- /dev/null
-+++ b/arch/x86/include/asm/cpu_entry_area.h
-@@ -0,0 +1,52 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#ifndef _ASM_X86_CPU_ENTRY_AREA_H
-+#define _ASM_X86_CPU_ENTRY_AREA_H
-+
-+#include <linux/percpu-defs.h>
-+#include <asm/processor.h>
-+
-+/*
-+ * cpu_entry_area is a percpu region that contains things needed by the CPU
-+ * and early entry/exit code.  Real types aren't used for all fields here
-+ * to avoid circular header dependencies.
-+ *
-+ * Every field is a virtual alias of some other allocated backing store.
-+ * There is no direct allocation of a struct cpu_entry_area.
-+ */
-+struct cpu_entry_area {
-+      char gdt[PAGE_SIZE];
-+
-+      /*
-+       * The GDT is just below entry_stack and thus serves (on x86_64) as
-+       * a a read-only guard page.
-+       */
-+      struct entry_stack_page entry_stack_page;
-+
-+      /*
-+       * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
-+       * we need task switches to work, and task switches write to the TSS.
-+       */
-+      struct tss_struct tss;
-+
-+      char entry_trampoline[PAGE_SIZE];
-+
-+#ifdef CONFIG_X86_64
-+      /*
-+       * Exception stacks used for IST entries.
-+       *
-+       * In the future, this should have a separate slot for each stack
-+       * with guard pages between them.
-+       */
-+      char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
-+#endif
-+};
-+
-+#define CPU_ENTRY_AREA_SIZE   (sizeof(struct cpu_entry_area))
-+#define CPU_ENTRY_AREA_PAGES  (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
-+
-+DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
-+
-+extern void setup_cpu_entry_areas(void);
-+
-+#endif
-diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
-index a7fb137ad964..1b2521473480 100644
---- a/arch/x86/include/asm/fixmap.h
-+++ b/arch/x86/include/asm/fixmap.h
-@@ -25,6 +25,7 @@
- #else
- #include <uapi/asm/vsyscall.h>
- #endif
-+#include <asm/cpu_entry_area.h>
- 
- /*
-  * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
-@@ -44,46 +45,6 @@ extern unsigned long __FIXADDR_TOP;
-                        PAGE_SIZE)
- #endif
- 
--/*
-- * cpu_entry_area is a percpu region in the fixmap that contains things
-- * needed by the CPU and early entry/exit code.  Real types aren't used
-- * for all fields here to avoid circular header dependencies.
-- *
-- * Every field is a virtual alias of some other allocated backing store.
-- * There is no direct allocation of a struct cpu_entry_area.
-- */
--struct cpu_entry_area {
--      char gdt[PAGE_SIZE];
--
--      /*
--       * The GDT is just below entry_stack and thus serves (on x86_64) as
--       * a a read-only guard page.
--       */
--      struct entry_stack_page entry_stack_page;
--
--      /*
--       * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
--       * we need task switches to work, and task switches write to the TSS.
--       */
--      struct tss_struct tss;
--
--      char entry_trampoline[PAGE_SIZE];
--
--#ifdef CONFIG_X86_64
--      /*
--       * Exception stacks used for IST entries.
--       *
--       * In the future, this should have a separate slot for each stack
--       * with guard pages between them.
--       */
--      char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
--#endif
--};
--
--#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
--
--extern void setup_cpu_entry_areas(void);
--
- /*
-  * Here we define all the compile-time 'special' virtual
-  * addresses. The point is to have a constant address at
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 7a8a5d436566..96171ce46d61 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -482,102 +482,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
-         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
-         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
- };
--
--static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
--      [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
--#endif
--
--static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page,
--                                 entry_stack_storage);
--
--static void __init
--set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
--{
--      for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
--              __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
--}
--
--/* Setup the fixmap mappings only once per-processor */
--static void __init setup_cpu_entry_area(int cpu)
--{
--#ifdef CONFIG_X86_64
--      extern char _entry_trampoline[];
--
--      /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
--      pgprot_t gdt_prot = PAGE_KERNEL_RO;
--      pgprot_t tss_prot = PAGE_KERNEL_RO;
--#else
--      /*
--       * On native 32-bit systems, the GDT cannot be read-only because
--       * our double fault handler uses a task gate, and entering through
--       * a task gate needs to change an available TSS to busy.  If the
--       * GDT is read-only, that will triple fault.  The TSS cannot be
--       * read-only because the CPU writes to it on task switches.
--       *
--       * On Xen PV, the GDT must be read-only because the hypervisor
--       * requires it.
--       */
--      pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
--              PAGE_KERNEL_RO : PAGE_KERNEL;
--      pgprot_t tss_prot = PAGE_KERNEL;
--#endif
--
--      __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
--      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
--                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
--                              PAGE_KERNEL);
--
--      /*
--       * The Intel SDM says (Volume 3, 7.2.1):
--       *
--       *  Avoid placing a page boundary in the part of the TSS that the
--       *  processor reads during a task switch (the first 104 bytes). The
--       *  processor may not correctly perform address translations if a
--       *  boundary occurs in this area. During a task switch, the processor
--       *  reads and writes into the first 104 bytes of each TSS (using
--       *  contiguous physical addresses beginning with the physical address
--       *  of the first byte of the TSS). So, after TSS access begins, if
--       *  part of the 104 bytes is not physically contiguous, the processor
--       *  will access incorrect information without generating a page-fault
--       *  exception.
--       *
--       * There are also a lot of errata involving the TSS spanning a page
--       * boundary.  Assert that we're not doing that.
--       */
--      BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
--                    offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
--      BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
--      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
--                              &per_cpu(cpu_tss_rw, cpu),
--                              sizeof(struct tss_struct) / PAGE_SIZE,
--                              tss_prot);
--
--#ifdef CONFIG_X86_32
--      per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
- #endif
- 
--#ifdef CONFIG_X86_64
--      BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
--      BUILD_BUG_ON(sizeof(exception_stacks) !=
--                   sizeof(((struct cpu_entry_area *)0)->exception_stacks));
--      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
--                              &per_cpu(exception_stacks, cpu),
--                              sizeof(exception_stacks) / PAGE_SIZE,
--                              PAGE_KERNEL);
--
--      __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
--                   __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
--#endif
--}
--
--void __init setup_cpu_entry_areas(void)
--{
--      unsigned int cpu;
--
--      for_each_possible_cpu(cpu)
--              setup_cpu_entry_area(cpu);
--}
--
- /* Load the original GDT from the per-cpu structure */
- void load_direct_gdt(int cpu)
- {
-diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
-index 14b462eefa17..ef2d1b8a0516 100644
---- a/arch/x86/kernel/traps.c
-+++ b/arch/x86/kernel/traps.c
-@@ -57,6 +57,7 @@
- #include <asm/traps.h>
- #include <asm/desc.h>
- #include <asm/fpu/internal.h>
-+#include <asm/cpu_entry_area.h>
- #include <asm/mce.h>
- #include <asm/fixmap.h>
- #include <asm/mach_traps.h>
-diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
-new file mode 100644
-index 000000000000..235ff9cfaaf4
---- /dev/null
-+++ b/arch/x86/mm/cpu_entry_area.c
-@@ -0,0 +1,104 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <linux/spinlock.h>
-+#include <linux/percpu.h>
-+
-+#include <asm/cpu_entry_area.h>
-+#include <asm/pgtable.h>
-+#include <asm/fixmap.h>
-+#include <asm/desc.h>
-+
-+static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
-+
-+#ifdef CONFIG_X86_64
-+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-+      [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-+#endif
-+
-+static void __init
-+set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
-+{
-+      for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
-+              __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
-+}
-+
-+/* Setup the fixmap mappings only once per-processor */
-+static void __init setup_cpu_entry_area(int cpu)
-+{
-+#ifdef CONFIG_X86_64
-+      extern char _entry_trampoline[];
-+
-+      /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
-+      pgprot_t gdt_prot = PAGE_KERNEL_RO;
-+      pgprot_t tss_prot = PAGE_KERNEL_RO;
-+#else
-+      /*
-+       * On native 32-bit systems, the GDT cannot be read-only because
-+       * our double fault handler uses a task gate, and entering through
-+       * a task gate needs to change an available TSS to busy.  If the
-+       * GDT is read-only, that will triple fault.  The TSS cannot be
-+       * read-only because the CPU writes to it on task switches.
-+       *
-+       * On Xen PV, the GDT must be read-only because the hypervisor
-+       * requires it.
-+       */
-+      pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
-+              PAGE_KERNEL_RO : PAGE_KERNEL;
-+      pgprot_t tss_prot = PAGE_KERNEL;
-+#endif
-+
-+      __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
-+      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
-+                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
-+                              PAGE_KERNEL);
-+
-+      /*
-+       * The Intel SDM says (Volume 3, 7.2.1):
-+       *
-+       *  Avoid placing a page boundary in the part of the TSS that the
-+       *  processor reads during a task switch (the first 104 bytes). The
-+       *  processor may not correctly perform address translations if a
-+       *  boundary occurs in this area. During a task switch, the processor
-+       *  reads and writes into the first 104 bytes of each TSS (using
-+       *  contiguous physical addresses beginning with the physical address
-+       *  of the first byte of the TSS). So, after TSS access begins, if
-+       *  part of the 104 bytes is not physically contiguous, the processor
-+       *  will access incorrect information without generating a page-fault
-+       *  exception.
-+       *
-+       * There are also a lot of errata involving the TSS spanning a page
-+       * boundary.  Assert that we're not doing that.
-+       */
-+      BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
-+                    offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
-+      BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
-+      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
-+                              &per_cpu(cpu_tss_rw, cpu),
-+                              sizeof(struct tss_struct) / PAGE_SIZE,
-+                              tss_prot);
-+
-+#ifdef CONFIG_X86_32
-+      per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
-+#endif
-+
-+#ifdef CONFIG_X86_64
-+      BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
-+      BUILD_BUG_ON(sizeof(exception_stacks) !=
-+                   sizeof(((struct cpu_entry_area *)0)->exception_stacks));
-+      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
-+                              &per_cpu(exception_stacks, cpu),
-+                              sizeof(exception_stacks) / PAGE_SIZE,
-+                              PAGE_KERNEL);
-+
-+      __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
-+                   __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
-+#endif
-+}
-+
-+void __init setup_cpu_entry_areas(void)
-+{
-+      unsigned int cpu;
-+
-+      for_each_possible_cpu(cpu)
-+              setup_cpu_entry_area(cpu);
-+}
--- 
-2.14.2
-
diff --git a/patches/kernel/0185-x86-mm-Create-asm-invpcid.h.patch b/patches/kernel/0185-x86-mm-Create-asm-invpcid.h.patch

new file mode 100644 (file)

index 0000000..78cf317
--- /dev/null
+++ b/patches/kernel/0185-x86-mm-Create-asm-invpcid.h.patch
@@ -0,0 +1,168 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:47 +0100
+Subject: [PATCH] x86/mm: Create asm/invpcid.h
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Unclutter tlbflush.h a little.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 1a3b0caeb77edeac5ce5fa05e6a61c474c9a9745)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 5af02a8c43ce521f460891f6ba68af69428abe90)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/invpcid.h  | 53 +++++++++++++++++++++++++++++++++++++++++
+ arch/x86/include/asm/tlbflush.h | 49 +------------------------------------
+ 2 files changed, 54 insertions(+), 48 deletions(-)
+ create mode 100644 arch/x86/include/asm/invpcid.h
+
+diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
+new file mode 100644
+index 000000000000..989cfa86de85
+--- /dev/null
++++ b/arch/x86/include/asm/invpcid.h
+@@ -0,0 +1,53 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _ASM_X86_INVPCID
++#define _ASM_X86_INVPCID
++
++static inline void __invpcid(unsigned long pcid, unsigned long addr,
++                           unsigned long type)
++{
++      struct { u64 d[2]; } desc = { { pcid, addr } };
++
++      /*
++       * The memory clobber is because the whole point is to invalidate
++       * stale TLB entries and, especially if we're flushing global
++       * mappings, we don't want the compiler to reorder any subsequent
++       * memory accesses before the TLB flush.
++       *
++       * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
++       * invpcid (%rcx), %rax in long mode.
++       */
++      asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
++                    : : "m" (desc), "a" (type), "c" (&desc) : "memory");
++}
++
++#define INVPCID_TYPE_INDIV_ADDR               0
++#define INVPCID_TYPE_SINGLE_CTXT      1
++#define INVPCID_TYPE_ALL_INCL_GLOBAL  2
++#define INVPCID_TYPE_ALL_NON_GLOBAL   3
++
++/* Flush all mappings for a given pcid and addr, not including globals. */
++static inline void invpcid_flush_one(unsigned long pcid,
++                                   unsigned long addr)
++{
++      __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
++}
++
++/* Flush all mappings for a given PCID, not including globals. */
++static inline void invpcid_flush_single_context(unsigned long pcid)
++{
++      __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
++}
++
++/* Flush all mappings, including globals, for all PCIDs. */
++static inline void invpcid_flush_all(void)
++{
++      __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
++}
++
++/* Flush all mappings for all PCIDs except globals. */
++static inline void invpcid_flush_all_nonglobals(void)
++{
++      __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
++}
++
++#endif /* _ASM_X86_INVPCID */
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index ecd634f87e4e..503f87c30c15 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -8,54 +8,7 @@
+ #include <asm/cpufeature.h>
+ #include <asm/special_insns.h>
+ #include <asm/smp.h>
+-
+-static inline void __invpcid(unsigned long pcid, unsigned long addr,
+-                           unsigned long type)
+-{
+-      struct { u64 d[2]; } desc = { { pcid, addr } };
+-
+-      /*
+-       * The memory clobber is because the whole point is to invalidate
+-       * stale TLB entries and, especially if we're flushing global
+-       * mappings, we don't want the compiler to reorder any subsequent
+-       * memory accesses before the TLB flush.
+-       *
+-       * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+-       * invpcid (%rcx), %rax in long mode.
+-       */
+-      asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+-                    : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+-}
+-
+-#define INVPCID_TYPE_INDIV_ADDR               0
+-#define INVPCID_TYPE_SINGLE_CTXT      1
+-#define INVPCID_TYPE_ALL_INCL_GLOBAL  2
+-#define INVPCID_TYPE_ALL_NON_GLOBAL   3
+-
+-/* Flush all mappings for a given pcid and addr, not including globals. */
+-static inline void invpcid_flush_one(unsigned long pcid,
+-                                   unsigned long addr)
+-{
+-      __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+-}
+-
+-/* Flush all mappings for a given PCID, not including globals. */
+-static inline void invpcid_flush_single_context(unsigned long pcid)
+-{
+-      __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+-}
+-
+-/* Flush all mappings, including globals, for all PCIDs. */
+-static inline void invpcid_flush_all(void)
+-{
+-      __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+-}
+-
+-/* Flush all mappings for all PCIDs except globals. */
+-static inline void invpcid_flush_all_nonglobals(void)
+-{
+-      __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+-}
++#include <asm/invpcid.h>
+ 
+ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ {
+-- 
+2.14.2
+
diff --git a/patches/kernel/0186-x86-cpu_entry_area-Move-it-out-of-the-fixmap.patch b/patches/kernel/0186-x86-cpu_entry_area-Move-it-out-of-the-fixmap.patch

deleted file mode 100644 (file)

index 726fe13..0000000
--- a/patches/kernel/0186-x86-cpu_entry_area-Move-it-out-of-the-fixmap.patch
+++ /dev/null
@@ -1,588 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 20 Dec 2017 18:51:31 +0100
-Subject: [PATCH] x86/cpu_entry_area: Move it out of the fixmap
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Put the cpu_entry_area into a separate P4D entry. The fixmap gets too big
-and 0-day already hit a case where the fixmap PTEs were cleared by
-cleanup_highmap().
-
-Aside of that the fixmap API is a pain as it's all backwards.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: linux-kernel@vger.kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit 92a0f81d89571e3e8759366e050ee05cc545ef99)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit bda9eb328d9ce3757f22794f79da73dd5886c93a)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/x86/x86_64/mm.txt         |  2 +
- arch/x86/include/asm/cpu_entry_area.h   | 18 ++++++++-
- arch/x86/include/asm/desc.h             |  2 +
- arch/x86/include/asm/fixmap.h           | 32 +---------------
- arch/x86/include/asm/pgtable_32_types.h | 15 ++++++--
- arch/x86/include/asm/pgtable_64_types.h | 47 +++++++++++++----------
- arch/x86/kernel/dumpstack.c             |  1 +
- arch/x86/kernel/traps.c                 |  5 ++-
- arch/x86/mm/cpu_entry_area.c            | 66 +++++++++++++++++++++++++--------
- arch/x86/mm/dump_pagetables.c           |  6 ++-
- arch/x86/mm/init_32.c                   |  6 +++
- arch/x86/mm/kasan_init_64.c             | 30 ++++++++-------
- arch/x86/mm/pgtable_32.c                |  1 +
- arch/x86/xen/mmu_pv.c                   |  2 -
- 14 files changed, 145 insertions(+), 88 deletions(-)
-
-diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
-index 63a41671d25b..51101708a03a 100644
---- a/Documentation/x86/x86_64/mm.txt
-+++ b/Documentation/x86/x86_64/mm.txt
-@@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
- ... unused hole ...
- ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
- ... unused hole ...
-+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
- ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
- ... unused hole ...
- ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
-@@ -35,6 +36,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
- ... unused hole ...
- ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
- ... unused hole ...
-+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
- ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
- ... unused hole ...
- ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
-diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
-index 5471826803af..2fbc69a0916e 100644
---- a/arch/x86/include/asm/cpu_entry_area.h
-+++ b/arch/x86/include/asm/cpu_entry_area.h
-@@ -43,10 +43,26 @@ struct cpu_entry_area {
- };
- 
- #define CPU_ENTRY_AREA_SIZE   (sizeof(struct cpu_entry_area))
--#define CPU_ENTRY_AREA_PAGES  (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
-+#define CPU_ENTRY_AREA_TOT_SIZE       (CPU_ENTRY_AREA_SIZE * NR_CPUS)
- 
- DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
- 
- extern void setup_cpu_entry_areas(void);
-+extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
-+
-+#define       CPU_ENTRY_AREA_RO_IDT           CPU_ENTRY_AREA_BASE
-+#define CPU_ENTRY_AREA_PER_CPU                (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
-+
-+#define CPU_ENTRY_AREA_RO_IDT_VADDR   ((void *)CPU_ENTRY_AREA_RO_IDT)
-+
-+#define CPU_ENTRY_AREA_MAP_SIZE                       \
-+      (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
-+
-+extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
-+
-+static inline struct entry_stack *cpu_entry_stack(int cpu)
-+{
-+      return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
-+}
- 
- #endif
-diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
-index b817fe247506..de40c514ba25 100644
---- a/arch/x86/include/asm/desc.h
-+++ b/arch/x86/include/asm/desc.h
-@@ -5,6 +5,8 @@
- #include <asm/ldt.h>
- #include <asm/mmu.h>
- #include <asm/fixmap.h>
-+#include <asm/pgtable.h>
-+#include <asm/cpu_entry_area.h>
- 
- #include <linux/smp.h>
- #include <linux/percpu.h>
-diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
-index 1b2521473480..a6ff9e1a6189 100644
---- a/arch/x86/include/asm/fixmap.h
-+++ b/arch/x86/include/asm/fixmap.h
-@@ -25,7 +25,6 @@
- #else
- #include <uapi/asm/vsyscall.h>
- #endif
--#include <asm/cpu_entry_area.h>
- 
- /*
-  * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
-@@ -84,7 +83,6 @@ enum fixed_addresses {
-       FIX_IO_APIC_BASE_0,
-       FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
- #endif
--      FIX_RO_IDT,     /* Virtual mapping for read-only IDT */
- #ifdef CONFIG_X86_32
-       FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
-       FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
-@@ -100,9 +98,6 @@ enum fixed_addresses {
- #ifdef        CONFIG_X86_INTEL_MID
-       FIX_LNW_VRTC,
- #endif
--      /* Fixmap entries to remap the GDTs, one per processor. */
--      FIX_CPU_ENTRY_AREA_TOP,
--      FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
- 
- #ifdef CONFIG_ACPI_APEI_GHES
-       /* Used for GHES mapping from assorted contexts */
-@@ -143,7 +138,7 @@ enum fixed_addresses {
- extern void reserve_top_address(unsigned long reserve);
- 
- #define FIXADDR_SIZE  (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
--#define FIXADDR_START         (FIXADDR_TOP - FIXADDR_SIZE)
-+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
- 
- extern int fixmaps_set;
- 
-@@ -171,30 +166,5 @@ static inline void __set_fixmap(enum fixed_addresses idx,
- void __early_set_fixmap(enum fixed_addresses idx,
-                       phys_addr_t phys, pgprot_t flags);
- 
--static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
--{
--      BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
--
--      return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
--}
--
--#define __get_cpu_entry_area_offset_index(cpu, offset) ({             \
--      BUILD_BUG_ON(offset % PAGE_SIZE != 0);                          \
--      __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE);       \
--      })
--
--#define get_cpu_entry_area_index(cpu, field)                          \
--      __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
--
--static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
--{
--      return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
--}
--
--static inline struct entry_stack *cpu_entry_stack(int cpu)
--{
--      return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
--}
--
- #endif /* !__ASSEMBLY__ */
- #endif /* _ASM_X86_FIXMAP_H */
-diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
-index 9fb2f2bc8245..67b60e11b70d 100644
---- a/arch/x86/include/asm/pgtable_32_types.h
-+++ b/arch/x86/include/asm/pgtable_32_types.h
-@@ -37,13 +37,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
- #define LAST_PKMAP 1024
- #endif
- 
--#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1))    \
--                  & PMD_MASK)
-+/*
-+ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
-+ * to avoid include recursion hell
-+ */
-+#define CPU_ENTRY_AREA_PAGES  (NR_CPUS * 40)
-+
-+#define CPU_ENTRY_AREA_BASE                           \
-+      ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
-+
-+#define PKMAP_BASE            \
-+      ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
- 
- #ifdef CONFIG_HIGHMEM
- # define VMALLOC_END  (PKMAP_BASE - 2 * PAGE_SIZE)
- #else
--# define VMALLOC_END  (FIXADDR_START - 2 * PAGE_SIZE)
-+# define VMALLOC_END  (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
- #endif
- 
- #define MODULES_VADDR VMALLOC_START
-diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
-index 06470da156ba..42e2750da525 100644
---- a/arch/x86/include/asm/pgtable_64_types.h
-+++ b/arch/x86/include/asm/pgtable_64_types.h
-@@ -75,32 +75,41 @@ typedef struct { pteval_t pte; } pte_t;
- #define PGDIR_MASK    (~(PGDIR_SIZE - 1))
- 
- /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
--#define MAXMEM                _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
-+#define MAXMEM                        _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
-+
- #ifdef CONFIG_X86_5LEVEL
--#define VMALLOC_SIZE_TB _AC(16384, UL)
--#define __VMALLOC_BASE        _AC(0xff92000000000000, UL)
--#define __VMEMMAP_BASE        _AC(0xffd4000000000000, UL)
-+# define VMALLOC_SIZE_TB      _AC(16384, UL)
-+# define __VMALLOC_BASE               _AC(0xff92000000000000, UL)
-+# define __VMEMMAP_BASE               _AC(0xffd4000000000000, UL)
- #else
--#define VMALLOC_SIZE_TB       _AC(32, UL)
--#define __VMALLOC_BASE        _AC(0xffffc90000000000, UL)
--#define __VMEMMAP_BASE        _AC(0xffffea0000000000, UL)
-+# define VMALLOC_SIZE_TB      _AC(32, UL)
-+# define __VMALLOC_BASE               _AC(0xffffc90000000000, UL)
-+# define __VMEMMAP_BASE               _AC(0xffffea0000000000, UL)
- #endif
-+
- #ifdef CONFIG_RANDOMIZE_MEMORY
--#define VMALLOC_START vmalloc_base
--#define VMEMMAP_START vmemmap_base
-+# define VMALLOC_START                vmalloc_base
-+# define VMEMMAP_START                vmemmap_base
- #else
--#define VMALLOC_START __VMALLOC_BASE
--#define VMEMMAP_START __VMEMMAP_BASE
-+# define VMALLOC_START                __VMALLOC_BASE
-+# define VMEMMAP_START                __VMEMMAP_BASE
- #endif /* CONFIG_RANDOMIZE_MEMORY */
--#define VMALLOC_END   (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
--#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
-+
-+#define VMALLOC_END           (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-+
-+#define MODULES_VADDR         (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
- /* The module sections ends with the start of the fixmap */
--#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
--#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
--#define ESPFIX_PGD_ENTRY _AC(-2, UL)
--#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
--#define EFI_VA_START   ( -4 * (_AC(1, UL) << 30))
--#define EFI_VA_END     (-68 * (_AC(1, UL) << 30))
-+#define MODULES_END           __fix_to_virt(__end_of_fixed_addresses + 1)
-+#define MODULES_LEN           (MODULES_END - MODULES_VADDR)
-+
-+#define ESPFIX_PGD_ENTRY      _AC(-2, UL)
-+#define ESPFIX_BASE_ADDR      (ESPFIX_PGD_ENTRY << P4D_SHIFT)
-+
-+#define CPU_ENTRY_AREA_PGD    _AC(-3, UL)
-+#define CPU_ENTRY_AREA_BASE   (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
-+
-+#define EFI_VA_START          ( -4 * (_AC(1, UL) << 30))
-+#define EFI_VA_END            (-68 * (_AC(1, UL) << 30))
- 
- #define EARLY_DYNAMIC_PAGE_TABLES     64
- 
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index 55bf1c3b5319..2bdeb983b9d8 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -18,6 +18,7 @@
- #include <linux/nmi.h>
- #include <linux/sysfs.h>
- 
-+#include <asm/cpu_entry_area.h>
- #include <asm/stacktrace.h>
- #include <asm/unwind.h>
- 
-diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
-index ef2d1b8a0516..5808ccb59266 100644
---- a/arch/x86/kernel/traps.c
-+++ b/arch/x86/kernel/traps.c
-@@ -1041,8 +1041,9 @@ void __init trap_init(void)
-        * "sidt" instruction will not leak the location of the kernel, and
-        * to defend the IDT against arbitrary memory write vulnerabilities.
-        * It will be reloaded in cpu_init() */
--      __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
--      idt_descr.address = fix_to_virt(FIX_RO_IDT);
-+      cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
-+                  PAGE_KERNEL_RO);
-+      idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
- 
-       /*
-        * Should be a barrier for any external CPU state:
-diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
-index 235ff9cfaaf4..21e8b595cbb1 100644
---- a/arch/x86/mm/cpu_entry_area.c
-+++ b/arch/x86/mm/cpu_entry_area.c
-@@ -15,11 +15,27 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
- #endif
- 
-+struct cpu_entry_area *get_cpu_entry_area(int cpu)
-+{
-+      unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
-+      BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
-+
-+      return (struct cpu_entry_area *) va;
-+}
-+EXPORT_SYMBOL(get_cpu_entry_area);
-+
-+void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
-+{
-+      unsigned long va = (unsigned long) cea_vaddr;
-+
-+      set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
-+}
-+
- static void __init
--set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
-+cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
- {
--      for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
--              __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
-+      for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
-+              cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
- }
- 
- /* Setup the fixmap mappings only once per-processor */
-@@ -47,10 +63,12 @@ static void __init setup_cpu_entry_area(int cpu)
-       pgprot_t tss_prot = PAGE_KERNEL;
- #endif
- 
--      __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
--      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
--                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
--                              PAGE_KERNEL);
-+      cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
-+                  gdt_prot);
-+
-+      cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
-+                           per_cpu_ptr(&entry_stack_storage, cpu), 1,
-+                           PAGE_KERNEL);
- 
-       /*
-        * The Intel SDM says (Volume 3, 7.2.1):
-@@ -72,10 +90,9 @@ static void __init setup_cpu_entry_area(int cpu)
-       BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
-                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
-       BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
--      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
--                              &per_cpu(cpu_tss_rw, cpu),
--                              sizeof(struct tss_struct) / PAGE_SIZE,
--                              tss_prot);
-+      cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
-+                           &per_cpu(cpu_tss_rw, cpu),
-+                           sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
- 
- #ifdef CONFIG_X86_32
-       per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
-@@ -85,20 +102,37 @@ static void __init setup_cpu_entry_area(int cpu)
-       BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
-       BUILD_BUG_ON(sizeof(exception_stacks) !=
-                    sizeof(((struct cpu_entry_area *)0)->exception_stacks));
--      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
--                              &per_cpu(exception_stacks, cpu),
--                              sizeof(exception_stacks) / PAGE_SIZE,
--                              PAGE_KERNEL);
-+      cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
-+                           &per_cpu(exception_stacks, cpu),
-+                           sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
- 
--      __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
-+      cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
-                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
- #endif
- }
- 
-+static __init void setup_cpu_entry_area_ptes(void)
-+{
-+#ifdef CONFIG_X86_32
-+      unsigned long start, end;
-+
-+      BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
-+      BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
-+
-+      start = CPU_ENTRY_AREA_BASE;
-+      end = start + CPU_ENTRY_AREA_MAP_SIZE;
-+
-+      for (; start < end; start += PMD_SIZE)
-+              populate_extra_pte(start);
-+#endif
-+}
-+
- void __init setup_cpu_entry_areas(void)
- {
-       unsigned int cpu;
- 
-+      setup_cpu_entry_area_ptes();
-+
-       for_each_possible_cpu(cpu)
-               setup_cpu_entry_area(cpu);
- }
-diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
-index 318a7c30e87e..3b7720404a9f 100644
---- a/arch/x86/mm/dump_pagetables.c
-+++ b/arch/x86/mm/dump_pagetables.c
-@@ -58,6 +58,7 @@ enum address_markers_idx {
-       KASAN_SHADOW_START_NR,
-       KASAN_SHADOW_END_NR,
- #endif
-+      CPU_ENTRY_AREA_NR,
- #ifdef CONFIG_X86_ESPFIX64
-       ESPFIX_START_NR,
- #endif
-@@ -81,6 +82,7 @@ static struct addr_marker address_markers[] = {
-       [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
-       [KASAN_SHADOW_END_NR]   = { KASAN_SHADOW_END,   "KASAN shadow end" },
- #endif
-+      [CPU_ENTRY_AREA_NR]     = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
- #ifdef CONFIG_X86_ESPFIX64
-       [ESPFIX_START_NR]       = { ESPFIX_BASE_ADDR,   "ESPfix Area", 16 },
- #endif
-@@ -104,6 +106,7 @@ enum address_markers_idx {
- #ifdef CONFIG_HIGHMEM
-       PKMAP_BASE_NR,
- #endif
-+      CPU_ENTRY_AREA_NR,
-       FIXADDR_START_NR,
-       END_OF_SPACE_NR,
- };
-@@ -116,6 +119,7 @@ static struct addr_marker address_markers[] = {
- #ifdef CONFIG_HIGHMEM
-       [PKMAP_BASE_NR]         = { 0UL,                "Persistent kmap() Area" },
- #endif
-+      [CPU_ENTRY_AREA_NR]     = { 0UL,                "CPU entry area" },
-       [FIXADDR_START_NR]      = { 0UL,                "Fixmap area" },
-       [END_OF_SPACE_NR]       = { -1,                 NULL }
- };
-@@ -522,8 +526,8 @@ static int __init pt_dump_init(void)
-       address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
- # endif
-       address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
-+      address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
- #endif
--
-       return 0;
- }
- __initcall(pt_dump_init);
-diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
-index 8a64a6f2848d..135c9a7898c7 100644
---- a/arch/x86/mm/init_32.c
-+++ b/arch/x86/mm/init_32.c
-@@ -50,6 +50,7 @@
- #include <asm/setup.h>
- #include <asm/set_memory.h>
- #include <asm/page_types.h>
-+#include <asm/cpu_entry_area.h>
- #include <asm/init.h>
- 
- #include "mm_internal.h"
-@@ -766,6 +767,7 @@ void __init mem_init(void)
-       mem_init_print_info(NULL);
-       printk(KERN_INFO "virtual kernel memory layout:\n"
-               "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-+              "  cpu_entry : 0x%08lx - 0x%08lx   (%4ld kB)\n"
- #ifdef CONFIG_HIGHMEM
-               "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
- #endif
-@@ -777,6 +779,10 @@ void __init mem_init(void)
-               FIXADDR_START, FIXADDR_TOP,
-               (FIXADDR_TOP - FIXADDR_START) >> 10,
- 
-+              CPU_ENTRY_AREA_BASE,
-+              CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
-+              CPU_ENTRY_AREA_MAP_SIZE >> 10,
-+
- #ifdef CONFIG_HIGHMEM
-               PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
-               (LAST_PKMAP*PAGE_SIZE) >> 10,
-diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
-index d8836e45bc07..4cd556a30ee1 100644
---- a/arch/x86/mm/kasan_init_64.c
-+++ b/arch/x86/mm/kasan_init_64.c
-@@ -13,6 +13,8 @@
- #include <asm/pgalloc.h>
- #include <asm/tlbflush.h>
- #include <asm/sections.h>
-+#include <asm/pgtable.h>
-+#include <asm/cpu_entry_area.h>
- 
- extern pgd_t early_top_pgt[PTRS_PER_PGD];
- extern struct range pfn_mapped[E820_MAX_ENTRIES];
-@@ -321,31 +323,33 @@ void __init kasan_init(void)
-               map_range(&pfn_mapped[i]);
-       }
- 
--      kasan_populate_zero_shadow(
--              kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
--              kasan_mem_to_shadow((void *)__START_KERNEL_map));
--
--      kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
--                            (unsigned long)kasan_mem_to_shadow(_end),
--                            early_pfn_to_nid(__pa(_stext)));
--
--      shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
-+      shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
-       shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
-       shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
-                                               PAGE_SIZE);
- 
--      shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
-+      shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
-+                                      CPU_ENTRY_AREA_MAP_SIZE);
-       shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
-       shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
-                                       PAGE_SIZE);
- 
--      kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
--                                 shadow_cpu_entry_begin);
-+      kasan_populate_zero_shadow(
-+              kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
-+              shadow_cpu_entry_begin);
- 
-       kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
-                             (unsigned long)shadow_cpu_entry_end, 0);
- 
--      kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
-+      kasan_populate_zero_shadow(shadow_cpu_entry_end,
-+                              kasan_mem_to_shadow((void *)__START_KERNEL_map));
-+
-+      kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
-+                            (unsigned long)kasan_mem_to_shadow(_end),
-+                            early_pfn_to_nid(__pa(_stext)));
-+
-+      kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
-+                              (void *)KASAN_SHADOW_END);
- 
-       load_cr3(init_top_pgt);
-       __flush_tlb_all();
-diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
-index b9bd5b8b14fa..77909bae5943 100644
---- a/arch/x86/mm/pgtable_32.c
-+++ b/arch/x86/mm/pgtable_32.c
-@@ -9,6 +9,7 @@
- #include <linux/pagemap.h>
- #include <linux/spinlock.h>
- 
-+#include <asm/cpu_entry_area.h>
- #include <asm/pgtable.h>
- #include <asm/pgalloc.h>
- #include <asm/fixmap.h>
-diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
-index 53e65f605bdd..cd4b91b8d614 100644
---- a/arch/x86/xen/mmu_pv.c
-+++ b/arch/x86/xen/mmu_pv.c
-@@ -2286,7 +2286,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
- 
-       switch (idx) {
-       case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
--      case FIX_RO_IDT:
- #ifdef CONFIG_X86_32
-       case FIX_WP_TEST:
- # ifdef CONFIG_HIGHMEM
-@@ -2297,7 +2296,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
- #endif
-       case FIX_TEXT_POKE0:
-       case FIX_TEXT_POKE1:
--      case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
-               /* All local page mappings */
-               pte = pfn_pte(phys, prot);
-               break;
--- 
-2.14.2
-
diff --git a/patches/kernel/0186-x86-cpu_entry_area-Move-it-to-a-separate-unit.patch b/patches/kernel/0186-x86-cpu_entry_area-Move-it-to-a-separate-unit.patch

new file mode 100644 (file)

index 0000000..84983a3
--- /dev/null
+++ b/patches/kernel/0186-x86-cpu_entry_area-Move-it-to-a-separate-unit.patch
@@ -0,0 +1,400 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Dec 2017 18:28:54 +0100
+Subject: [PATCH] x86/cpu_entry_area: Move it to a separate unit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Separate the cpu_entry_area code out of cpu/common.c and the fixmap.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 0fa11d2cd3d67af676aa2762ade282ba6d09cbe5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/Makefile                  |   2 +-
+ arch/x86/include/asm/cpu_entry_area.h |  52 +++++++++++++++++
+ arch/x86/include/asm/fixmap.h         |  41 +-------------
+ arch/x86/kernel/cpu/common.c          |  94 ------------------------------
+ arch/x86/kernel/traps.c               |   1 +
+ arch/x86/mm/cpu_entry_area.c          | 104 ++++++++++++++++++++++++++++++++++
+ 6 files changed, 159 insertions(+), 135 deletions(-)
+ create mode 100644 arch/x86/include/asm/cpu_entry_area.h
+ create mode 100644 arch/x86/mm/cpu_entry_area.c
+
+diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
+index 0fbdcb64f9f8..76f5399a8356 100644
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -2,7 +2,7 @@
+ KCOV_INSTRUMENT_tlb.o := n
+ 
+ obj-y :=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
+-          pat.o pgtable.o physaddr.o setup_nx.o tlb.o
++          pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
+ 
+ # Make sure __phys_addr has no stackprotector
+ nostackp := $(call cc-option, -fno-stack-protector)
+diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
+new file mode 100644
+index 000000000000..5471826803af
+--- /dev/null
++++ b/arch/x86/include/asm/cpu_entry_area.h
+@@ -0,0 +1,52 @@
++// SPDX-License-Identifier: GPL-2.0
++
++#ifndef _ASM_X86_CPU_ENTRY_AREA_H
++#define _ASM_X86_CPU_ENTRY_AREA_H
++
++#include <linux/percpu-defs.h>
++#include <asm/processor.h>
++
++/*
++ * cpu_entry_area is a percpu region that contains things needed by the CPU
++ * and early entry/exit code.  Real types aren't used for all fields here
++ * to avoid circular header dependencies.
++ *
++ * Every field is a virtual alias of some other allocated backing store.
++ * There is no direct allocation of a struct cpu_entry_area.
++ */
++struct cpu_entry_area {
++      char gdt[PAGE_SIZE];
++
++      /*
++       * The GDT is just below entry_stack and thus serves (on x86_64) as
++       * a a read-only guard page.
++       */
++      struct entry_stack_page entry_stack_page;
++
++      /*
++       * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
++       * we need task switches to work, and task switches write to the TSS.
++       */
++      struct tss_struct tss;
++
++      char entry_trampoline[PAGE_SIZE];
++
++#ifdef CONFIG_X86_64
++      /*
++       * Exception stacks used for IST entries.
++       *
++       * In the future, this should have a separate slot for each stack
++       * with guard pages between them.
++       */
++      char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
++#endif
++};
++
++#define CPU_ENTRY_AREA_SIZE   (sizeof(struct cpu_entry_area))
++#define CPU_ENTRY_AREA_PAGES  (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
++
++DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
++
++extern void setup_cpu_entry_areas(void);
++
++#endif
+diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
+index a7fb137ad964..1b2521473480 100644
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -25,6 +25,7 @@
+ #else
+ #include <uapi/asm/vsyscall.h>
+ #endif
++#include <asm/cpu_entry_area.h>
+ 
+ /*
+  * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
+@@ -44,46 +45,6 @@ extern unsigned long __FIXADDR_TOP;
+                        PAGE_SIZE)
+ #endif
+ 
+-/*
+- * cpu_entry_area is a percpu region in the fixmap that contains things
+- * needed by the CPU and early entry/exit code.  Real types aren't used
+- * for all fields here to avoid circular header dependencies.
+- *
+- * Every field is a virtual alias of some other allocated backing store.
+- * There is no direct allocation of a struct cpu_entry_area.
+- */
+-struct cpu_entry_area {
+-      char gdt[PAGE_SIZE];
+-
+-      /*
+-       * The GDT is just below entry_stack and thus serves (on x86_64) as
+-       * a a read-only guard page.
+-       */
+-      struct entry_stack_page entry_stack_page;
+-
+-      /*
+-       * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
+-       * we need task switches to work, and task switches write to the TSS.
+-       */
+-      struct tss_struct tss;
+-
+-      char entry_trampoline[PAGE_SIZE];
+-
+-#ifdef CONFIG_X86_64
+-      /*
+-       * Exception stacks used for IST entries.
+-       *
+-       * In the future, this should have a separate slot for each stack
+-       * with guard pages between them.
+-       */
+-      char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+-#endif
+-};
+-
+-#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
+-
+-extern void setup_cpu_entry_areas(void);
+-
+ /*
+  * Here we define all the compile-time 'special' virtual
+  * addresses. The point is to have a constant address at
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 7a8a5d436566..96171ce46d61 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -482,102 +482,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
+         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
+ };
+-
+-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+-      [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+-#endif
+-
+-static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page,
+-                                 entry_stack_storage);
+-
+-static void __init
+-set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
+-{
+-      for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
+-              __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
+-}
+-
+-/* Setup the fixmap mappings only once per-processor */
+-static void __init setup_cpu_entry_area(int cpu)
+-{
+-#ifdef CONFIG_X86_64
+-      extern char _entry_trampoline[];
+-
+-      /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+-      pgprot_t gdt_prot = PAGE_KERNEL_RO;
+-      pgprot_t tss_prot = PAGE_KERNEL_RO;
+-#else
+-      /*
+-       * On native 32-bit systems, the GDT cannot be read-only because
+-       * our double fault handler uses a task gate, and entering through
+-       * a task gate needs to change an available TSS to busy.  If the
+-       * GDT is read-only, that will triple fault.  The TSS cannot be
+-       * read-only because the CPU writes to it on task switches.
+-       *
+-       * On Xen PV, the GDT must be read-only because the hypervisor
+-       * requires it.
+-       */
+-      pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+-              PAGE_KERNEL_RO : PAGE_KERNEL;
+-      pgprot_t tss_prot = PAGE_KERNEL;
+-#endif
+-
+-      __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
+-                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
+-                              PAGE_KERNEL);
+-
+-      /*
+-       * The Intel SDM says (Volume 3, 7.2.1):
+-       *
+-       *  Avoid placing a page boundary in the part of the TSS that the
+-       *  processor reads during a task switch (the first 104 bytes). The
+-       *  processor may not correctly perform address translations if a
+-       *  boundary occurs in this area. During a task switch, the processor
+-       *  reads and writes into the first 104 bytes of each TSS (using
+-       *  contiguous physical addresses beginning with the physical address
+-       *  of the first byte of the TSS). So, after TSS access begins, if
+-       *  part of the 104 bytes is not physically contiguous, the processor
+-       *  will access incorrect information without generating a page-fault
+-       *  exception.
+-       *
+-       * There are also a lot of errata involving the TSS spanning a page
+-       * boundary.  Assert that we're not doing that.
+-       */
+-      BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+-                    offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+-      BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
+-                              &per_cpu(cpu_tss_rw, cpu),
+-                              sizeof(struct tss_struct) / PAGE_SIZE,
+-                              tss_prot);
+-
+-#ifdef CONFIG_X86_32
+-      per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+ #endif
+ 
+-#ifdef CONFIG_X86_64
+-      BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+-      BUILD_BUG_ON(sizeof(exception_stacks) !=
+-                   sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
+-                              &per_cpu(exception_stacks, cpu),
+-                              sizeof(exception_stacks) / PAGE_SIZE,
+-                              PAGE_KERNEL);
+-
+-      __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
+-                   __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+-#endif
+-}
+-
+-void __init setup_cpu_entry_areas(void)
+-{
+-      unsigned int cpu;
+-
+-      for_each_possible_cpu(cpu)
+-              setup_cpu_entry_area(cpu);
+-}
+-
+ /* Load the original GDT from the per-cpu structure */
+ void load_direct_gdt(int cpu)
+ {
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index 14b462eefa17..ef2d1b8a0516 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -57,6 +57,7 @@
+ #include <asm/traps.h>
+ #include <asm/desc.h>
+ #include <asm/fpu/internal.h>
++#include <asm/cpu_entry_area.h>
+ #include <asm/mce.h>
+ #include <asm/fixmap.h>
+ #include <asm/mach_traps.h>
+diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
+new file mode 100644
+index 000000000000..235ff9cfaaf4
+--- /dev/null
++++ b/arch/x86/mm/cpu_entry_area.c
+@@ -0,0 +1,104 @@
++// SPDX-License-Identifier: GPL-2.0
++
++#include <linux/spinlock.h>
++#include <linux/percpu.h>
++
++#include <asm/cpu_entry_area.h>
++#include <asm/pgtable.h>
++#include <asm/fixmap.h>
++#include <asm/desc.h>
++
++static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
++
++#ifdef CONFIG_X86_64
++static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
++      [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
++#endif
++
++static void __init
++set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
++{
++      for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
++              __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
++}
++
++/* Setup the fixmap mappings only once per-processor */
++static void __init setup_cpu_entry_area(int cpu)
++{
++#ifdef CONFIG_X86_64
++      extern char _entry_trampoline[];
++
++      /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
++      pgprot_t gdt_prot = PAGE_KERNEL_RO;
++      pgprot_t tss_prot = PAGE_KERNEL_RO;
++#else
++      /*
++       * On native 32-bit systems, the GDT cannot be read-only because
++       * our double fault handler uses a task gate, and entering through
++       * a task gate needs to change an available TSS to busy.  If the
++       * GDT is read-only, that will triple fault.  The TSS cannot be
++       * read-only because the CPU writes to it on task switches.
++       *
++       * On Xen PV, the GDT must be read-only because the hypervisor
++       * requires it.
++       */
++      pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
++              PAGE_KERNEL_RO : PAGE_KERNEL;
++      pgprot_t tss_prot = PAGE_KERNEL;
++#endif
++
++      __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
++                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
++                              PAGE_KERNEL);
++
++      /*
++       * The Intel SDM says (Volume 3, 7.2.1):
++       *
++       *  Avoid placing a page boundary in the part of the TSS that the
++       *  processor reads during a task switch (the first 104 bytes). The
++       *  processor may not correctly perform address translations if a
++       *  boundary occurs in this area. During a task switch, the processor
++       *  reads and writes into the first 104 bytes of each TSS (using
++       *  contiguous physical addresses beginning with the physical address
++       *  of the first byte of the TSS). So, after TSS access begins, if
++       *  part of the 104 bytes is not physically contiguous, the processor
++       *  will access incorrect information without generating a page-fault
++       *  exception.
++       *
++       * There are also a lot of errata involving the TSS spanning a page
++       * boundary.  Assert that we're not doing that.
++       */
++      BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
++                    offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
++      BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
++                              &per_cpu(cpu_tss_rw, cpu),
++                              sizeof(struct tss_struct) / PAGE_SIZE,
++                              tss_prot);
++
++#ifdef CONFIG_X86_32
++      per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
++#endif
++
++#ifdef CONFIG_X86_64
++      BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
++      BUILD_BUG_ON(sizeof(exception_stacks) !=
++                   sizeof(((struct cpu_entry_area *)0)->exception_stacks));
++      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
++                              &per_cpu(exception_stacks, cpu),
++                              sizeof(exception_stacks) / PAGE_SIZE,
++                              PAGE_KERNEL);
++
++      __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
++                   __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
++#endif
++}
++
++void __init setup_cpu_entry_areas(void)
++{
++      unsigned int cpu;
++
++      for_each_possible_cpu(cpu)
++              setup_cpu_entry_area(cpu);
++}
+-- 
+2.14.2
+
diff --git a/patches/kernel/0187-init-Invoke-init_espfix_bsp-from-mm_init.patch b/patches/kernel/0187-init-Invoke-init_espfix_bsp-from-mm_init.patch

deleted file mode 100644 (file)

index 78868d6..0000000
--- a/patches/kernel/0187-init-Invoke-init_espfix_bsp-from-mm_init.patch
+++ /dev/null
@@ -1,123 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Sun, 17 Dec 2017 10:56:29 +0100
-Subject: [PATCH] init: Invoke init_espfix_bsp() from mm_init()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-init_espfix_bsp() needs to be invoked before the page table isolation
-initialization. Move it into mm_init() which is the place where pti_init()
-will be added.
-
-While at it get rid of the #ifdeffery and provide proper stub functions.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 613e396bc0d4c7604fba23256644e78454c68cf6)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit a187e1a3cd87c860a8db188991d2d43fedd7225f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/espfix.h | 7 ++++---
- include/asm-generic/pgtable.h | 5 +++++
- arch/x86/kernel/smpboot.c     | 6 +-----
- init/main.c                   | 6 ++----
- 4 files changed, 12 insertions(+), 12 deletions(-)
-
-diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
-index ca3ce9ab9385..e7009ac975af 100644
---- a/arch/x86/include/asm/espfix.h
-+++ b/arch/x86/include/asm/espfix.h
-@@ -1,7 +1,7 @@
- #ifndef _ASM_X86_ESPFIX_H
- #define _ASM_X86_ESPFIX_H
- 
--#ifdef CONFIG_X86_64
-+#ifdef CONFIG_X86_ESPFIX64
- 
- #include <asm/percpu.h>
- 
-@@ -10,7 +10,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
- 
- extern void init_espfix_bsp(void);
- extern void init_espfix_ap(int cpu);
--
--#endif /* CONFIG_X86_64 */
-+#else
-+static inline void init_espfix_ap(int cpu) { }
-+#endif
- 
- #endif /* _ASM_X86_ESPFIX_H */
-diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
-index 7dfa767dc680..1bab3cfc0601 100644
---- a/include/asm-generic/pgtable.h
-+++ b/include/asm-generic/pgtable.h
-@@ -956,6 +956,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
- struct file;
- int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
-                       unsigned long size, pgprot_t *vma_prot);
-+
-+#ifndef CONFIG_X86_ESPFIX64
-+static inline void init_espfix_bsp(void) { }
-+#endif
-+
- #endif /* !__ASSEMBLY__ */
- 
- #ifndef io_remap_pfn_range
-diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
-index 8ea3b18cbdc1..03d2ba2da3b0 100644
---- a/arch/x86/kernel/smpboot.c
-+++ b/arch/x86/kernel/smpboot.c
-@@ -989,12 +989,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
-       initial_code = (unsigned long)start_secondary;
-       initial_stack  = idle->thread.sp;
- 
--      /*
--       * Enable the espfix hack for this CPU
--      */
--#ifdef CONFIG_X86_ESPFIX64
-+      /* Enable the espfix hack for this CPU */
-       init_espfix_ap(cpu);
--#endif
- 
-       /* So we see what's up */
-       announce_cpu(cpu, apicid);
-diff --git a/init/main.c b/init/main.c
-index 83d1004e3b97..de1c495da782 100644
---- a/init/main.c
-+++ b/init/main.c
-@@ -504,6 +504,8 @@ static void __init mm_init(void)
-       pgtable_init();
-       vmalloc_init();
-       ioremap_huge_init();
-+      /* Should be run before the first non-init thread is created */
-+      init_espfix_bsp();
- }
- 
- asmlinkage __visible void __init start_kernel(void)
-@@ -664,10 +666,6 @@ asmlinkage __visible void __init start_kernel(void)
- #ifdef CONFIG_X86
-       if (efi_enabled(EFI_RUNTIME_SERVICES))
-               efi_enter_virtual_mode();
--#endif
--#ifdef CONFIG_X86_ESPFIX64
--      /* Should be run before the first non-init thread is created */
--      init_espfix_bsp();
- #endif
-       thread_stack_cache_init();
-       cred_init();
--- 
-2.14.2
-
diff --git a/patches/kernel/0187-x86-cpu_entry_area-Move-it-out-of-the-fixmap.patch b/patches/kernel/0187-x86-cpu_entry_area-Move-it-out-of-the-fixmap.patch

new file mode 100644 (file)

index 0000000..726fe13
--- /dev/null
+++ b/patches/kernel/0187-x86-cpu_entry_area-Move-it-out-of-the-fixmap.patch
@@ -0,0 +1,588 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Dec 2017 18:51:31 +0100
+Subject: [PATCH] x86/cpu_entry_area: Move it out of the fixmap
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Put the cpu_entry_area into a separate P4D entry. The fixmap gets too big
+and 0-day already hit a case where the fixmap PTEs were cleared by
+cleanup_highmap().
+
+Aside of that the fixmap API is a pain as it's all backwards.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit 92a0f81d89571e3e8759366e050ee05cc545ef99)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit bda9eb328d9ce3757f22794f79da73dd5886c93a)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/x86/x86_64/mm.txt         |  2 +
+ arch/x86/include/asm/cpu_entry_area.h   | 18 ++++++++-
+ arch/x86/include/asm/desc.h             |  2 +
+ arch/x86/include/asm/fixmap.h           | 32 +---------------
+ arch/x86/include/asm/pgtable_32_types.h | 15 ++++++--
+ arch/x86/include/asm/pgtable_64_types.h | 47 +++++++++++++----------
+ arch/x86/kernel/dumpstack.c             |  1 +
+ arch/x86/kernel/traps.c                 |  5 ++-
+ arch/x86/mm/cpu_entry_area.c            | 66 +++++++++++++++++++++++++--------
+ arch/x86/mm/dump_pagetables.c           |  6 ++-
+ arch/x86/mm/init_32.c                   |  6 +++
+ arch/x86/mm/kasan_init_64.c             | 30 ++++++++-------
+ arch/x86/mm/pgtable_32.c                |  1 +
+ arch/x86/xen/mmu_pv.c                   |  2 -
+ 14 files changed, 145 insertions(+), 88 deletions(-)
+
+diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
+index 63a41671d25b..51101708a03a 100644
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
+ ... unused hole ...
+ ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
+ ... unused hole ...
++fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ... unused hole ...
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+@@ -35,6 +36,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
+ ... unused hole ...
+ ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
+ ... unused hole ...
++fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ... unused hole ...
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
+index 5471826803af..2fbc69a0916e 100644
+--- a/arch/x86/include/asm/cpu_entry_area.h
++++ b/arch/x86/include/asm/cpu_entry_area.h
+@@ -43,10 +43,26 @@ struct cpu_entry_area {
+ };
+ 
+ #define CPU_ENTRY_AREA_SIZE   (sizeof(struct cpu_entry_area))
+-#define CPU_ENTRY_AREA_PAGES  (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
++#define CPU_ENTRY_AREA_TOT_SIZE       (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+ 
+ DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+ 
+ extern void setup_cpu_entry_areas(void);
++extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
++
++#define       CPU_ENTRY_AREA_RO_IDT           CPU_ENTRY_AREA_BASE
++#define CPU_ENTRY_AREA_PER_CPU                (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
++
++#define CPU_ENTRY_AREA_RO_IDT_VADDR   ((void *)CPU_ENTRY_AREA_RO_IDT)
++
++#define CPU_ENTRY_AREA_MAP_SIZE                       \
++      (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
++
++extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
++
++static inline struct entry_stack *cpu_entry_stack(int cpu)
++{
++      return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
++}
+ 
+ #endif
+diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
+index b817fe247506..de40c514ba25 100644
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -5,6 +5,8 @@
+ #include <asm/ldt.h>
+ #include <asm/mmu.h>
+ #include <asm/fixmap.h>
++#include <asm/pgtable.h>
++#include <asm/cpu_entry_area.h>
+ 
+ #include <linux/smp.h>
+ #include <linux/percpu.h>
+diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
+index 1b2521473480..a6ff9e1a6189 100644
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -25,7 +25,6 @@
+ #else
+ #include <uapi/asm/vsyscall.h>
+ #endif
+-#include <asm/cpu_entry_area.h>
+ 
+ /*
+  * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
+@@ -84,7 +83,6 @@ enum fixed_addresses {
+       FIX_IO_APIC_BASE_0,
+       FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
+ #endif
+-      FIX_RO_IDT,     /* Virtual mapping for read-only IDT */
+ #ifdef CONFIG_X86_32
+       FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
+       FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+@@ -100,9 +98,6 @@ enum fixed_addresses {
+ #ifdef        CONFIG_X86_INTEL_MID
+       FIX_LNW_VRTC,
+ #endif
+-      /* Fixmap entries to remap the GDTs, one per processor. */
+-      FIX_CPU_ENTRY_AREA_TOP,
+-      FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
+ 
+ #ifdef CONFIG_ACPI_APEI_GHES
+       /* Used for GHES mapping from assorted contexts */
+@@ -143,7 +138,7 @@ enum fixed_addresses {
+ extern void reserve_top_address(unsigned long reserve);
+ 
+ #define FIXADDR_SIZE  (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+-#define FIXADDR_START         (FIXADDR_TOP - FIXADDR_SIZE)
++#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+ 
+ extern int fixmaps_set;
+ 
+@@ -171,30 +166,5 @@ static inline void __set_fixmap(enum fixed_addresses idx,
+ void __early_set_fixmap(enum fixed_addresses idx,
+                       phys_addr_t phys, pgprot_t flags);
+ 
+-static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
+-{
+-      BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
+-
+-      return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
+-}
+-
+-#define __get_cpu_entry_area_offset_index(cpu, offset) ({             \
+-      BUILD_BUG_ON(offset % PAGE_SIZE != 0);                          \
+-      __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE);       \
+-      })
+-
+-#define get_cpu_entry_area_index(cpu, field)                          \
+-      __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
+-
+-static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
+-{
+-      return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
+-}
+-
+-static inline struct entry_stack *cpu_entry_stack(int cpu)
+-{
+-      return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+-}
+-
+ #endif /* !__ASSEMBLY__ */
+ #endif /* _ASM_X86_FIXMAP_H */
+diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
+index 9fb2f2bc8245..67b60e11b70d 100644
+--- a/arch/x86/include/asm/pgtable_32_types.h
++++ b/arch/x86/include/asm/pgtable_32_types.h
+@@ -37,13 +37,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
+ #define LAST_PKMAP 1024
+ #endif
+ 
+-#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1))    \
+-                  & PMD_MASK)
++/*
++ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
++ * to avoid include recursion hell
++ */
++#define CPU_ENTRY_AREA_PAGES  (NR_CPUS * 40)
++
++#define CPU_ENTRY_AREA_BASE                           \
++      ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
++
++#define PKMAP_BASE            \
++      ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+ 
+ #ifdef CONFIG_HIGHMEM
+ # define VMALLOC_END  (PKMAP_BASE - 2 * PAGE_SIZE)
+ #else
+-# define VMALLOC_END  (FIXADDR_START - 2 * PAGE_SIZE)
++# define VMALLOC_END  (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
+ #endif
+ 
+ #define MODULES_VADDR VMALLOC_START
+diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
+index 06470da156ba..42e2750da525 100644
+--- a/arch/x86/include/asm/pgtable_64_types.h
++++ b/arch/x86/include/asm/pgtable_64_types.h
+@@ -75,32 +75,41 @@ typedef struct { pteval_t pte; } pte_t;
+ #define PGDIR_MASK    (~(PGDIR_SIZE - 1))
+ 
+ /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+-#define MAXMEM                _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
++#define MAXMEM                        _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
++
+ #ifdef CONFIG_X86_5LEVEL
+-#define VMALLOC_SIZE_TB _AC(16384, UL)
+-#define __VMALLOC_BASE        _AC(0xff92000000000000, UL)
+-#define __VMEMMAP_BASE        _AC(0xffd4000000000000, UL)
++# define VMALLOC_SIZE_TB      _AC(16384, UL)
++# define __VMALLOC_BASE               _AC(0xff92000000000000, UL)
++# define __VMEMMAP_BASE               _AC(0xffd4000000000000, UL)
+ #else
+-#define VMALLOC_SIZE_TB       _AC(32, UL)
+-#define __VMALLOC_BASE        _AC(0xffffc90000000000, UL)
+-#define __VMEMMAP_BASE        _AC(0xffffea0000000000, UL)
++# define VMALLOC_SIZE_TB      _AC(32, UL)
++# define __VMALLOC_BASE               _AC(0xffffc90000000000, UL)
++# define __VMEMMAP_BASE               _AC(0xffffea0000000000, UL)
+ #endif
++
+ #ifdef CONFIG_RANDOMIZE_MEMORY
+-#define VMALLOC_START vmalloc_base
+-#define VMEMMAP_START vmemmap_base
++# define VMALLOC_START                vmalloc_base
++# define VMEMMAP_START                vmemmap_base
+ #else
+-#define VMALLOC_START __VMALLOC_BASE
+-#define VMEMMAP_START __VMEMMAP_BASE
++# define VMALLOC_START                __VMALLOC_BASE
++# define VMEMMAP_START                __VMEMMAP_BASE
+ #endif /* CONFIG_RANDOMIZE_MEMORY */
+-#define VMALLOC_END   (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
++
++#define VMALLOC_END           (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
++
++#define MODULES_VADDR         (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+ /* The module sections ends with the start of the fixmap */
+-#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
+-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
+-#define ESPFIX_PGD_ENTRY _AC(-2, UL)
+-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
+-#define EFI_VA_START   ( -4 * (_AC(1, UL) << 30))
+-#define EFI_VA_END     (-68 * (_AC(1, UL) << 30))
++#define MODULES_END           __fix_to_virt(__end_of_fixed_addresses + 1)
++#define MODULES_LEN           (MODULES_END - MODULES_VADDR)
++
++#define ESPFIX_PGD_ENTRY      _AC(-2, UL)
++#define ESPFIX_BASE_ADDR      (ESPFIX_PGD_ENTRY << P4D_SHIFT)
++
++#define CPU_ENTRY_AREA_PGD    _AC(-3, UL)
++#define CPU_ENTRY_AREA_BASE   (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
++
++#define EFI_VA_START          ( -4 * (_AC(1, UL) << 30))
++#define EFI_VA_END            (-68 * (_AC(1, UL) << 30))
+ 
+ #define EARLY_DYNAMIC_PAGE_TABLES     64
+ 
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index 55bf1c3b5319..2bdeb983b9d8 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -18,6 +18,7 @@
+ #include <linux/nmi.h>
+ #include <linux/sysfs.h>
+ 
++#include <asm/cpu_entry_area.h>
+ #include <asm/stacktrace.h>
+ #include <asm/unwind.h>
+ 
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index ef2d1b8a0516..5808ccb59266 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -1041,8 +1041,9 @@ void __init trap_init(void)
+        * "sidt" instruction will not leak the location of the kernel, and
+        * to defend the IDT against arbitrary memory write vulnerabilities.
+        * It will be reloaded in cpu_init() */
+-      __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
+-      idt_descr.address = fix_to_virt(FIX_RO_IDT);
++      cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
++                  PAGE_KERNEL_RO);
++      idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
+ 
+       /*
+        * Should be a barrier for any external CPU state:
+diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
+index 235ff9cfaaf4..21e8b595cbb1 100644
+--- a/arch/x86/mm/cpu_entry_area.c
++++ b/arch/x86/mm/cpu_entry_area.c
+@@ -15,11 +15,27 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+ #endif
+ 
++struct cpu_entry_area *get_cpu_entry_area(int cpu)
++{
++      unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
++      BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
++
++      return (struct cpu_entry_area *) va;
++}
++EXPORT_SYMBOL(get_cpu_entry_area);
++
++void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
++{
++      unsigned long va = (unsigned long) cea_vaddr;
++
++      set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
++}
++
+ static void __init
+-set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
++cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
+ {
+-      for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
+-              __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
++      for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
++              cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
+ }
+ 
+ /* Setup the fixmap mappings only once per-processor */
+@@ -47,10 +63,12 @@ static void __init setup_cpu_entry_area(int cpu)
+       pgprot_t tss_prot = PAGE_KERNEL;
+ #endif
+ 
+-      __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
+-                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
+-                              PAGE_KERNEL);
++      cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
++                  gdt_prot);
++
++      cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
++                           per_cpu_ptr(&entry_stack_storage, cpu), 1,
++                           PAGE_KERNEL);
+ 
+       /*
+        * The Intel SDM says (Volume 3, 7.2.1):
+@@ -72,10 +90,9 @@ static void __init setup_cpu_entry_area(int cpu)
+       BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+       BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
+-                              &per_cpu(cpu_tss_rw, cpu),
+-                              sizeof(struct tss_struct) / PAGE_SIZE,
+-                              tss_prot);
++      cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
++                           &per_cpu(cpu_tss_rw, cpu),
++                           sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
+ 
+ #ifdef CONFIG_X86_32
+       per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+@@ -85,20 +102,37 @@ static void __init setup_cpu_entry_area(int cpu)
+       BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+       BUILD_BUG_ON(sizeof(exception_stacks) !=
+                    sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+-      set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
+-                              &per_cpu(exception_stacks, cpu),
+-                              sizeof(exception_stacks) / PAGE_SIZE,
+-                              PAGE_KERNEL);
++      cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
++                           &per_cpu(exception_stacks, cpu),
++                           sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
+ 
+-      __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
++      cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
+                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+ #endif
+ }
+ 
++static __init void setup_cpu_entry_area_ptes(void)
++{
++#ifdef CONFIG_X86_32
++      unsigned long start, end;
++
++      BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
++      BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
++
++      start = CPU_ENTRY_AREA_BASE;
++      end = start + CPU_ENTRY_AREA_MAP_SIZE;
++
++      for (; start < end; start += PMD_SIZE)
++              populate_extra_pte(start);
++#endif
++}
++
+ void __init setup_cpu_entry_areas(void)
+ {
+       unsigned int cpu;
+ 
++      setup_cpu_entry_area_ptes();
++
+       for_each_possible_cpu(cpu)
+               setup_cpu_entry_area(cpu);
+ }
+diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
+index 318a7c30e87e..3b7720404a9f 100644
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -58,6 +58,7 @@ enum address_markers_idx {
+       KASAN_SHADOW_START_NR,
+       KASAN_SHADOW_END_NR,
+ #endif
++      CPU_ENTRY_AREA_NR,
+ #ifdef CONFIG_X86_ESPFIX64
+       ESPFIX_START_NR,
+ #endif
+@@ -81,6 +82,7 @@ static struct addr_marker address_markers[] = {
+       [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
+       [KASAN_SHADOW_END_NR]   = { KASAN_SHADOW_END,   "KASAN shadow end" },
+ #endif
++      [CPU_ENTRY_AREA_NR]     = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
+ #ifdef CONFIG_X86_ESPFIX64
+       [ESPFIX_START_NR]       = { ESPFIX_BASE_ADDR,   "ESPfix Area", 16 },
+ #endif
+@@ -104,6 +106,7 @@ enum address_markers_idx {
+ #ifdef CONFIG_HIGHMEM
+       PKMAP_BASE_NR,
+ #endif
++      CPU_ENTRY_AREA_NR,
+       FIXADDR_START_NR,
+       END_OF_SPACE_NR,
+ };
+@@ -116,6 +119,7 @@ static struct addr_marker address_markers[] = {
+ #ifdef CONFIG_HIGHMEM
+       [PKMAP_BASE_NR]         = { 0UL,                "Persistent kmap() Area" },
+ #endif
++      [CPU_ENTRY_AREA_NR]     = { 0UL,                "CPU entry area" },
+       [FIXADDR_START_NR]      = { 0UL,                "Fixmap area" },
+       [END_OF_SPACE_NR]       = { -1,                 NULL }
+ };
+@@ -522,8 +526,8 @@ static int __init pt_dump_init(void)
+       address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
+ # endif
+       address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
++      address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
+ #endif
+-
+       return 0;
+ }
+ __initcall(pt_dump_init);
+diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
+index 8a64a6f2848d..135c9a7898c7 100644
+--- a/arch/x86/mm/init_32.c
++++ b/arch/x86/mm/init_32.c
+@@ -50,6 +50,7 @@
+ #include <asm/setup.h>
+ #include <asm/set_memory.h>
+ #include <asm/page_types.h>
++#include <asm/cpu_entry_area.h>
+ #include <asm/init.h>
+ 
+ #include "mm_internal.h"
+@@ -766,6 +767,7 @@ void __init mem_init(void)
+       mem_init_print_info(NULL);
+       printk(KERN_INFO "virtual kernel memory layout:\n"
+               "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
++              "  cpu_entry : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+ #ifdef CONFIG_HIGHMEM
+               "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+ #endif
+@@ -777,6 +779,10 @@ void __init mem_init(void)
+               FIXADDR_START, FIXADDR_TOP,
+               (FIXADDR_TOP - FIXADDR_START) >> 10,
+ 
++              CPU_ENTRY_AREA_BASE,
++              CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
++              CPU_ENTRY_AREA_MAP_SIZE >> 10,
++
+ #ifdef CONFIG_HIGHMEM
+               PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
+               (LAST_PKMAP*PAGE_SIZE) >> 10,
+diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
+index d8836e45bc07..4cd556a30ee1 100644
+--- a/arch/x86/mm/kasan_init_64.c
++++ b/arch/x86/mm/kasan_init_64.c
+@@ -13,6 +13,8 @@
+ #include <asm/pgalloc.h>
+ #include <asm/tlbflush.h>
+ #include <asm/sections.h>
++#include <asm/pgtable.h>
++#include <asm/cpu_entry_area.h>
+ 
+ extern pgd_t early_top_pgt[PTRS_PER_PGD];
+ extern struct range pfn_mapped[E820_MAX_ENTRIES];
+@@ -321,31 +323,33 @@ void __init kasan_init(void)
+               map_range(&pfn_mapped[i]);
+       }
+ 
+-      kasan_populate_zero_shadow(
+-              kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
+-              kasan_mem_to_shadow((void *)__START_KERNEL_map));
+-
+-      kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
+-                            (unsigned long)kasan_mem_to_shadow(_end),
+-                            early_pfn_to_nid(__pa(_stext)));
+-
+-      shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
++      shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
+       shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
+       shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
+                                               PAGE_SIZE);
+ 
+-      shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
++      shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
++                                      CPU_ENTRY_AREA_MAP_SIZE);
+       shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
+       shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
+                                       PAGE_SIZE);
+ 
+-      kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
+-                                 shadow_cpu_entry_begin);
++      kasan_populate_zero_shadow(
++              kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
++              shadow_cpu_entry_begin);
+ 
+       kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
+                             (unsigned long)shadow_cpu_entry_end, 0);
+ 
+-      kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
++      kasan_populate_zero_shadow(shadow_cpu_entry_end,
++                              kasan_mem_to_shadow((void *)__START_KERNEL_map));
++
++      kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
++                            (unsigned long)kasan_mem_to_shadow(_end),
++                            early_pfn_to_nid(__pa(_stext)));
++
++      kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
++                              (void *)KASAN_SHADOW_END);
+ 
+       load_cr3(init_top_pgt);
+       __flush_tlb_all();
+diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
+index b9bd5b8b14fa..77909bae5943 100644
+--- a/arch/x86/mm/pgtable_32.c
++++ b/arch/x86/mm/pgtable_32.c
+@@ -9,6 +9,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/spinlock.h>
+ 
++#include <asm/cpu_entry_area.h>
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/fixmap.h>
+diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
+index 53e65f605bdd..cd4b91b8d614 100644
+--- a/arch/x86/xen/mmu_pv.c
++++ b/arch/x86/xen/mmu_pv.c
+@@ -2286,7 +2286,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
+ 
+       switch (idx) {
+       case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
+-      case FIX_RO_IDT:
+ #ifdef CONFIG_X86_32
+       case FIX_WP_TEST:
+ # ifdef CONFIG_HIGHMEM
+@@ -2297,7 +2296,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
+ #endif
+       case FIX_TEXT_POKE0:
+       case FIX_TEXT_POKE1:
+-      case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
+               /* All local page mappings */
+               pte = pfn_pte(phys, prot);
+               break;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0188-init-Invoke-init_espfix_bsp-from-mm_init.patch b/patches/kernel/0188-init-Invoke-init_espfix_bsp-from-mm_init.patch

new file mode 100644 (file)

index 0000000..78868d6
--- /dev/null
+++ b/patches/kernel/0188-init-Invoke-init_espfix_bsp-from-mm_init.patch
@@ -0,0 +1,123 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Dec 2017 10:56:29 +0100
+Subject: [PATCH] init: Invoke init_espfix_bsp() from mm_init()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+init_espfix_bsp() needs to be invoked before the page table isolation
+initialization. Move it into mm_init() which is the place where pti_init()
+will be added.
+
+While at it get rid of the #ifdeffery and provide proper stub functions.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 613e396bc0d4c7604fba23256644e78454c68cf6)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit a187e1a3cd87c860a8db188991d2d43fedd7225f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/espfix.h | 7 ++++---
+ include/asm-generic/pgtable.h | 5 +++++
+ arch/x86/kernel/smpboot.c     | 6 +-----
+ init/main.c                   | 6 ++----
+ 4 files changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
+index ca3ce9ab9385..e7009ac975af 100644
+--- a/arch/x86/include/asm/espfix.h
++++ b/arch/x86/include/asm/espfix.h
+@@ -1,7 +1,7 @@
+ #ifndef _ASM_X86_ESPFIX_H
+ #define _ASM_X86_ESPFIX_H
+ 
+-#ifdef CONFIG_X86_64
++#ifdef CONFIG_X86_ESPFIX64
+ 
+ #include <asm/percpu.h>
+ 
+@@ -10,7 +10,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
+ 
+ extern void init_espfix_bsp(void);
+ extern void init_espfix_ap(int cpu);
+-
+-#endif /* CONFIG_X86_64 */
++#else
++static inline void init_espfix_ap(int cpu) { }
++#endif
+ 
+ #endif /* _ASM_X86_ESPFIX_H */
+diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
+index 7dfa767dc680..1bab3cfc0601 100644
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -956,6 +956,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
+ struct file;
+ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
+                       unsigned long size, pgprot_t *vma_prot);
++
++#ifndef CONFIG_X86_ESPFIX64
++static inline void init_espfix_bsp(void) { }
++#endif
++
+ #endif /* !__ASSEMBLY__ */
+ 
+ #ifndef io_remap_pfn_range
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index 8ea3b18cbdc1..03d2ba2da3b0 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -989,12 +989,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
+       initial_code = (unsigned long)start_secondary;
+       initial_stack  = idle->thread.sp;
+ 
+-      /*
+-       * Enable the espfix hack for this CPU
+-      */
+-#ifdef CONFIG_X86_ESPFIX64
++      /* Enable the espfix hack for this CPU */
+       init_espfix_ap(cpu);
+-#endif
+ 
+       /* So we see what's up */
+       announce_cpu(cpu, apicid);
+diff --git a/init/main.c b/init/main.c
+index 83d1004e3b97..de1c495da782 100644
+--- a/init/main.c
++++ b/init/main.c
+@@ -504,6 +504,8 @@ static void __init mm_init(void)
+       pgtable_init();
+       vmalloc_init();
+       ioremap_huge_init();
++      /* Should be run before the first non-init thread is created */
++      init_espfix_bsp();
+ }
+ 
+ asmlinkage __visible void __init start_kernel(void)
+@@ -664,10 +666,6 @@ asmlinkage __visible void __init start_kernel(void)
+ #ifdef CONFIG_X86
+       if (efi_enabled(EFI_RUNTIME_SERVICES))
+               efi_enter_virtual_mode();
+-#endif
+-#ifdef CONFIG_X86_ESPFIX64
+-      /* Should be run before the first non-init thread is created */
+-      init_espfix_bsp();
+ #endif
+       thread_stack_cache_init();
+       cred_init();
+-- 
+2.14.2
+
diff --git a/patches/kernel/0188-x86-cpu_entry_area-Prevent-wraparound-in-setup_cpu_e.patch b/patches/kernel/0188-x86-cpu_entry_area-Prevent-wraparound-in-setup_cpu_e.patch

deleted file mode 100644 (file)

index 4f6e414..0000000
--- a/patches/kernel/0188-x86-cpu_entry_area-Prevent-wraparound-in-setup_cpu_e.patch
+++ /dev/null
@@ -1,49 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Sat, 23 Dec 2017 19:45:11 +0100
-Subject: [PATCH] x86/cpu_entry_area: Prevent wraparound in
- setup_cpu_entry_area_ptes() on 32bit
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The loop which populates the CPU entry area PMDs can wrap around on 32bit
-machines when the number of CPUs is small.
-
-It worked wonderful for NR_CPUS=64 for whatever reason and the moron who
-wrote that code did not bother to test it with !SMP.
-
-Check for the wraparound to fix it.
-
-Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
-Reported-by: kernel test robot <fengguang.wu@intel.com>
-Signed-off-by: Thomas "Feels stupid" Gleixner <tglx@linutronix.de>
-Tested-by: Borislav Petkov <bp@alien8.de>
-(cherry picked from commit f6c4fd506cb626e4346aa81688f255e593a7c5a0)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8a21158932b93ed7e72d16683085d55a3a06125e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/cpu_entry_area.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
-index 21e8b595cbb1..fe814fd5e014 100644
---- a/arch/x86/mm/cpu_entry_area.c
-+++ b/arch/x86/mm/cpu_entry_area.c
-@@ -122,7 +122,8 @@ static __init void setup_cpu_entry_area_ptes(void)
-       start = CPU_ENTRY_AREA_BASE;
-       end = start + CPU_ENTRY_AREA_MAP_SIZE;
- 
--      for (; start < end; start += PMD_SIZE)
-+      /* Careful here: start + PMD_SIZE might wrap around */
-+      for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
-               populate_extra_pte(start);
- #endif
- }
--- 
-2.14.2
-
diff --git a/patches/kernel/0189-x86-cpu_entry_area-Prevent-wraparound-in-setup_cpu_e.patch b/patches/kernel/0189-x86-cpu_entry_area-Prevent-wraparound-in-setup_cpu_e.patch

new file mode 100644 (file)

index 0000000..4f6e414
--- /dev/null
+++ b/patches/kernel/0189-x86-cpu_entry_area-Prevent-wraparound-in-setup_cpu_e.patch
@@ -0,0 +1,49 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 23 Dec 2017 19:45:11 +0100
+Subject: [PATCH] x86/cpu_entry_area: Prevent wraparound in
+ setup_cpu_entry_area_ptes() on 32bit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The loop which populates the CPU entry area PMDs can wrap around on 32bit
+machines when the number of CPUs is small.
+
+It worked wonderful for NR_CPUS=64 for whatever reason and the moron who
+wrote that code did not bother to test it with !SMP.
+
+Check for the wraparound to fix it.
+
+Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
+Reported-by: kernel test robot <fengguang.wu@intel.com>
+Signed-off-by: Thomas "Feels stupid" Gleixner <tglx@linutronix.de>
+Tested-by: Borislav Petkov <bp@alien8.de>
+(cherry picked from commit f6c4fd506cb626e4346aa81688f255e593a7c5a0)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8a21158932b93ed7e72d16683085d55a3a06125e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/cpu_entry_area.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
+index 21e8b595cbb1..fe814fd5e014 100644
+--- a/arch/x86/mm/cpu_entry_area.c
++++ b/arch/x86/mm/cpu_entry_area.c
+@@ -122,7 +122,8 @@ static __init void setup_cpu_entry_area_ptes(void)
+       start = CPU_ENTRY_AREA_BASE;
+       end = start + CPU_ENTRY_AREA_MAP_SIZE;
+ 
+-      for (; start < end; start += PMD_SIZE)
++      /* Careful here: start + PMD_SIZE might wrap around */
++      for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
+               populate_extra_pte(start);
+ #endif
+ }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0189-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch b/patches/kernel/0189-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch

deleted file mode 100644 (file)

index e0fbf55..0000000
--- a/patches/kernel/0189-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch
+++ /dev/null
@@ -1,120 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 4 Dec 2017 15:07:33 +0100
-Subject: [PATCH] x86/cpufeatures: Add X86_BUG_CPU_INSECURE
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Many x86 CPUs leak information to user space due to missing isolation of
-user space and kernel space page tables. There are many well documented
-ways to exploit that.
-
-The upcoming software migitation of isolating the user and kernel space
-page tables needs a misfeature flag so code can be made runtime
-conditional.
-
-Add the BUG bits which indicates that the CPU is affected and add a feature
-bit which indicates that the software migitation is enabled.
-
-Assume for now that _ALL_ x86 CPUs are affected by this. Exceptions can be
-made later.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit a89f040fa34ec9cd682aed98b8f04e3c47d998bd)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 3b0dffb3557f6a1084a2b92ac0cc2d36b5e1f39f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cpufeatures.h       | 3 ++-
- arch/x86/include/asm/disabled-features.h | 8 +++++++-
- arch/x86/kernel/cpu/common.c             | 4 ++++
- 3 files changed, 13 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index d57a174ec97c..de4e91452de4 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -200,7 +200,7 @@
- #define X86_FEATURE_HW_PSTATE         ( 7*32+ 8) /* AMD HW-PState */
- #define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
- #define X86_FEATURE_SME                       ( 7*32+10) /* AMD Secure Memory Encryption */
--
-+#define X86_FEATURE_PTI                       ( 7*32+11) /* Kernel Page Table Isolation enabled */
- #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
- #define X86_FEATURE_INTEL_PT          ( 7*32+15) /* Intel Processor Trace */
- #define X86_FEATURE_AVX512_4VNNIW     ( 7*32+16) /* AVX-512 Neural Network Instructions */
-@@ -339,5 +339,6 @@
- #define X86_BUG_SWAPGS_FENCE          X86_BUG(11) /* SWAPGS without input dep on GS */
- #define X86_BUG_MONITOR                       X86_BUG(12) /* IPI required to wake up remote CPU */
- #define X86_BUG_AMD_E400              X86_BUG(13) /* CPU is among the affected by Erratum 400 */
-+#define X86_BUG_CPU_INSECURE          X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
- 
- #endif /* _ASM_X86_CPUFEATURES_H */
-diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
-index 5dff775af7cd..db681152f024 100644
---- a/arch/x86/include/asm/disabled-features.h
-+++ b/arch/x86/include/asm/disabled-features.h
-@@ -42,6 +42,12 @@
- # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
- #endif
- 
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+# define DISABLE_PTI          0
-+#else
-+# define DISABLE_PTI          (1 << (X86_FEATURE_PTI & 31))
-+#endif
-+
- /*
-  * Make sure to add features to the correct mask
-  */
-@@ -52,7 +58,7 @@
- #define DISABLED_MASK4        0
- #define DISABLED_MASK5        0
- #define DISABLED_MASK6        0
--#define DISABLED_MASK7        0
-+#define DISABLED_MASK7        (DISABLE_PTI)
- #define DISABLED_MASK8        0
- #define DISABLED_MASK9        (DISABLE_MPX)
- #define DISABLED_MASK10       0
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 96171ce46d61..623ba3635793 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -898,6 +898,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
-       }
- 
-       setup_force_cpu_cap(X86_FEATURE_ALWAYS);
-+
-+      /* Assume for now that ALL x86 CPUs are insecure */
-+      setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
-+
-       fpu__init_system(c);
- }
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0190-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch b/patches/kernel/0190-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch

new file mode 100644 (file)

index 0000000..e0fbf55
--- /dev/null
+++ b/patches/kernel/0190-x86-cpufeatures-Add-X86_BUG_CPU_INSECURE.patch
@@ -0,0 +1,120 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:33 +0100
+Subject: [PATCH] x86/cpufeatures: Add X86_BUG_CPU_INSECURE
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Many x86 CPUs leak information to user space due to missing isolation of
+user space and kernel space page tables. There are many well documented
+ways to exploit that.
+
+The upcoming software migitation of isolating the user and kernel space
+page tables needs a misfeature flag so code can be made runtime
+conditional.
+
+Add the BUG bits which indicates that the CPU is affected and add a feature
+bit which indicates that the software migitation is enabled.
+
+Assume for now that _ALL_ x86 CPUs are affected by this. Exceptions can be
+made later.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit a89f040fa34ec9cd682aed98b8f04e3c47d998bd)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 3b0dffb3557f6a1084a2b92ac0cc2d36b5e1f39f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cpufeatures.h       | 3 ++-
+ arch/x86/include/asm/disabled-features.h | 8 +++++++-
+ arch/x86/kernel/cpu/common.c             | 4 ++++
+ 3 files changed, 13 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index d57a174ec97c..de4e91452de4 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -200,7 +200,7 @@
+ #define X86_FEATURE_HW_PSTATE         ( 7*32+ 8) /* AMD HW-PState */
+ #define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ #define X86_FEATURE_SME                       ( 7*32+10) /* AMD Secure Memory Encryption */
+-
++#define X86_FEATURE_PTI                       ( 7*32+11) /* Kernel Page Table Isolation enabled */
+ #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_INTEL_PT          ( 7*32+15) /* Intel Processor Trace */
+ #define X86_FEATURE_AVX512_4VNNIW     ( 7*32+16) /* AVX-512 Neural Network Instructions */
+@@ -339,5 +339,6 @@
+ #define X86_BUG_SWAPGS_FENCE          X86_BUG(11) /* SWAPGS without input dep on GS */
+ #define X86_BUG_MONITOR                       X86_BUG(12) /* IPI required to wake up remote CPU */
+ #define X86_BUG_AMD_E400              X86_BUG(13) /* CPU is among the affected by Erratum 400 */
++#define X86_BUG_CPU_INSECURE          X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
+ 
+ #endif /* _ASM_X86_CPUFEATURES_H */
+diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
+index 5dff775af7cd..db681152f024 100644
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -42,6 +42,12 @@
+ # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
+ #endif
+ 
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++# define DISABLE_PTI          0
++#else
++# define DISABLE_PTI          (1 << (X86_FEATURE_PTI & 31))
++#endif
++
+ /*
+  * Make sure to add features to the correct mask
+  */
+@@ -52,7 +58,7 @@
+ #define DISABLED_MASK4        0
+ #define DISABLED_MASK5        0
+ #define DISABLED_MASK6        0
+-#define DISABLED_MASK7        0
++#define DISABLED_MASK7        (DISABLE_PTI)
+ #define DISABLED_MASK8        0
+ #define DISABLED_MASK9        (DISABLE_MPX)
+ #define DISABLED_MASK10       0
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 96171ce46d61..623ba3635793 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -898,6 +898,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
+       }
+ 
+       setup_force_cpu_cap(X86_FEATURE_ALWAYS);
++
++      /* Assume for now that ALL x86 CPUs are insecure */
++      setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
++
+       fpu__init_system(c);
+ }
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0190-x86-mm-pti-Disable-global-pages-if-PAGE_TABLE_ISOLAT.patch b/patches/kernel/0190-x86-mm-pti-Disable-global-pages-if-PAGE_TABLE_ISOLAT.patch

deleted file mode 100644 (file)

index ad59431..0000000
--- a/patches/kernel/0190-x86-mm-pti-Disable-global-pages-if-PAGE_TABLE_ISOLAT.patch
+++ /dev/null
@@ -1,100 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:07:34 +0100
-Subject: [PATCH] x86/mm/pti: Disable global pages if PAGE_TABLE_ISOLATION=y
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Global pages stay in the TLB across context switches.  Since all contexts
-share the same kernel mapping, these mappings are marked as global pages
-so kernel entries in the TLB are not flushed out on a context switch.
-
-But, even having these entries in the TLB opens up something that an
-attacker can use, such as the double-page-fault attack:
-
-   http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf
-
-That means that even when PAGE_TABLE_ISOLATION switches page tables
-on return to user space the global pages would stay in the TLB cache.
-
-Disable global pages so that kernel TLB entries can be flushed before
-returning to user space. This way, all accesses to kernel addresses from
-userspace result in a TLB miss independent of the existence of a kernel
-mapping.
-
-Suppress global pages via the __supported_pte_mask. The user space
-mappings set PAGE_GLOBAL for the minimal kernel mappings which are
-required for entry/exit. These mappings are set up manually so the
-filtering does not take place.
-
-[ The __supported_pte_mask simplification was written by Thomas Gleixner. ]
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit c313ec66317d421fb5768d78c56abed2dc862264)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit ace78e99d765da1e59f6b151adac6c360c67af7d)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/init.c | 12 +++++++++---
- 1 file changed, 9 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
-index a22c2b95e513..020223420308 100644
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -161,6 +161,12 @@ struct map_range {
- 
- static int page_size_mask;
- 
-+static void enable_global_pages(void)
-+{
-+      if (!static_cpu_has(X86_FEATURE_PTI))
-+              __supported_pte_mask |= _PAGE_GLOBAL;
-+}
-+
- static void __init probe_page_size_mask(void)
- {
-       /*
-@@ -179,11 +185,11 @@ static void __init probe_page_size_mask(void)
-               cr4_set_bits_and_update_boot(X86_CR4_PSE);
- 
-       /* Enable PGE if available */
-+      __supported_pte_mask &= ~_PAGE_GLOBAL;
-       if (boot_cpu_has(X86_FEATURE_PGE)) {
-               cr4_set_bits_and_update_boot(X86_CR4_PGE);
--              __supported_pte_mask |= _PAGE_GLOBAL;
--      } else
--              __supported_pte_mask &= ~_PAGE_GLOBAL;
-+              enable_global_pages();
-+      }
- 
-       /* Enable 1 GB linear kernel mappings if available: */
-       if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
--- 
-2.14.2
-
diff --git a/patches/kernel/0191-x86-mm-pti-Disable-global-pages-if-PAGE_TABLE_ISOLAT.patch b/patches/kernel/0191-x86-mm-pti-Disable-global-pages-if-PAGE_TABLE_ISOLAT.patch

new file mode 100644 (file)

index 0000000..ad59431
--- /dev/null
+++ b/patches/kernel/0191-x86-mm-pti-Disable-global-pages-if-PAGE_TABLE_ISOLAT.patch
@@ -0,0 +1,100 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:34 +0100
+Subject: [PATCH] x86/mm/pti: Disable global pages if PAGE_TABLE_ISOLATION=y
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Global pages stay in the TLB across context switches.  Since all contexts
+share the same kernel mapping, these mappings are marked as global pages
+so kernel entries in the TLB are not flushed out on a context switch.
+
+But, even having these entries in the TLB opens up something that an
+attacker can use, such as the double-page-fault attack:
+
+   http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf
+
+That means that even when PAGE_TABLE_ISOLATION switches page tables
+on return to user space the global pages would stay in the TLB cache.
+
+Disable global pages so that kernel TLB entries can be flushed before
+returning to user space. This way, all accesses to kernel addresses from
+userspace result in a TLB miss independent of the existence of a kernel
+mapping.
+
+Suppress global pages via the __supported_pte_mask. The user space
+mappings set PAGE_GLOBAL for the minimal kernel mappings which are
+required for entry/exit. These mappings are set up manually so the
+filtering does not take place.
+
+[ The __supported_pte_mask simplification was written by Thomas Gleixner. ]
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit c313ec66317d421fb5768d78c56abed2dc862264)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit ace78e99d765da1e59f6b151adac6c360c67af7d)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/init.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index a22c2b95e513..020223420308 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -161,6 +161,12 @@ struct map_range {
+ 
+ static int page_size_mask;
+ 
++static void enable_global_pages(void)
++{
++      if (!static_cpu_has(X86_FEATURE_PTI))
++              __supported_pte_mask |= _PAGE_GLOBAL;
++}
++
+ static void __init probe_page_size_mask(void)
+ {
+       /*
+@@ -179,11 +185,11 @@ static void __init probe_page_size_mask(void)
+               cr4_set_bits_and_update_boot(X86_CR4_PSE);
+ 
+       /* Enable PGE if available */
++      __supported_pte_mask &= ~_PAGE_GLOBAL;
+       if (boot_cpu_has(X86_FEATURE_PGE)) {
+               cr4_set_bits_and_update_boot(X86_CR4_PGE);
+-              __supported_pte_mask |= _PAGE_GLOBAL;
+-      } else
+-              __supported_pte_mask &= ~_PAGE_GLOBAL;
++              enable_global_pages();
++      }
+ 
+       /* Enable 1 GB linear kernel mappings if available: */
+       if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
+-- 
+2.14.2
+
diff --git a/patches/kernel/0191-x86-mm-pti-Prepare-the-x86-entry-assembly-code-for-e.patch b/patches/kernel/0191-x86-mm-pti-Prepare-the-x86-entry-assembly-code-for-e.patch

deleted file mode 100644 (file)

index c16486b..0000000
--- a/patches/kernel/0191-x86-mm-pti-Prepare-the-x86-entry-assembly-code-for-e.patch
+++ /dev/null
@@ -1,381 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:07:35 +0100
-Subject: [PATCH] x86/mm/pti: Prepare the x86/entry assembly code for
- entry/exit CR3 switching
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-PAGE_TABLE_ISOLATION needs to switch to a different CR3 value when it
-enters the kernel and switch back when it exits.  This essentially needs to
-be done before leaving assembly code.
-
-This is extra challenging because the switching context is tricky: the
-registers that can be clobbered can vary.  It is also hard to store things
-on the stack because there is an established ABI (ptregs) or the stack is
-entirely unsafe to use.
-
-Establish a set of macros that allow changing to the user and kernel CR3
-values.
-
-Interactions with SWAPGS:
-
-  Previous versions of the PAGE_TABLE_ISOLATION code relied on having
-  per-CPU scratch space to save/restore a register that can be used for the
-  CR3 MOV.  The %GS register is used to index into our per-CPU space, so
-  SWAPGS *had* to be done before the CR3 switch.  That scratch space is gone
-  now, but the semantic that SWAPGS must be done before the CR3 MOV is
-  retained.  This is good to keep because it is not that hard to do and it
-  allows to do things like add per-CPU debugging information.
-
-What this does in the NMI code is worth pointing out.  NMIs can interrupt
-*any* context and they can also be nested with NMIs interrupting other
-NMIs.  The comments below ".Lnmi_from_kernel" explain the format of the
-stack during this situation.  Changing the format of this stack is hard.
-Instead of storing the old CR3 value on the stack, this depends on the
-*regular* register save/restore mechanism and then uses %r14 to keep CR3
-during the NMI.  It is callee-saved and will not be clobbered by the C NMI
-handlers that get called.
-
-[ PeterZ: ESPFIX optimization ]
-
-Based-on-code-from: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 8a09317b895f073977346779df52f67c1056d81d)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 313dfb599cf7f8e53fc6f710d15bed60972dcd6f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/calling.h         | 66 ++++++++++++++++++++++++++++++++++++++++
- arch/x86/entry/entry_64.S        | 45 +++++++++++++++++++++++----
- arch/x86/entry/entry_64_compat.S | 24 ++++++++++++++-
- 3 files changed, 128 insertions(+), 7 deletions(-)
-
-diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
-index 1895a685d3dd..dde6262be0a3 100644
---- a/arch/x86/entry/calling.h
-+++ b/arch/x86/entry/calling.h
-@@ -1,5 +1,7 @@
- #include <linux/jump_label.h>
- #include <asm/unwind_hints.h>
-+#include <asm/cpufeatures.h>
-+#include <asm/page_types.h>
- 
- /*
- 
-@@ -186,6 +188,70 @@ For 32-bit we have the following conventions - kernel is built with
- #endif
- .endm
- 
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+
-+/* PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two halves: */
-+#define PTI_SWITCH_MASK (1<<PAGE_SHIFT)
-+
-+.macro ADJUST_KERNEL_CR3 reg:req
-+      /* Clear "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
-+      andq    $(~PTI_SWITCH_MASK), \reg
-+.endm
-+
-+.macro ADJUST_USER_CR3 reg:req
-+      /* Move CR3 up a page to the user page tables: */
-+      orq     $(PTI_SWITCH_MASK), \reg
-+.endm
-+
-+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
-+      mov     %cr3, \scratch_reg
-+      ADJUST_KERNEL_CR3 \scratch_reg
-+      mov     \scratch_reg, %cr3
-+.endm
-+
-+.macro SWITCH_TO_USER_CR3 scratch_reg:req
-+      mov     %cr3, \scratch_reg
-+      ADJUST_USER_CR3 \scratch_reg
-+      mov     \scratch_reg, %cr3
-+.endm
-+
-+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
-+      movq    %cr3, \scratch_reg
-+      movq    \scratch_reg, \save_reg
-+      /*
-+       * Is the switch bit zero?  This means the address is
-+       * up in real PAGE_TABLE_ISOLATION patches in a moment.
-+       */
-+      testq   $(PTI_SWITCH_MASK), \scratch_reg
-+      jz      .Ldone_\@
-+
-+      ADJUST_KERNEL_CR3 \scratch_reg
-+      movq    \scratch_reg, %cr3
-+
-+.Ldone_\@:
-+.endm
-+
-+.macro RESTORE_CR3 save_reg:req
-+      /*
-+       * The CR3 write could be avoided when not changing its value,
-+       * but would require a CR3 read *and* a scratch register.
-+       */
-+      movq    \save_reg, %cr3
-+.endm
-+
-+#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
-+
-+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
-+.endm
-+.macro SWITCH_TO_USER_CR3 scratch_reg:req
-+.endm
-+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
-+.endm
-+.macro RESTORE_CR3 save_reg:req
-+.endm
-+
-+#endif
-+
- #endif /* CONFIG_X86_64 */
- 
- /*
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 03e052f02176..292ccc6ec48d 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -163,6 +163,9 @@ ENTRY(entry_SYSCALL_64_trampoline)
-       /* Stash the user RSP. */
-       movq    %rsp, RSP_SCRATCH
- 
-+      /* Note: using %rsp as a scratch reg. */
-+      SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
-+
-       /* Load the top of the task stack into RSP */
-       movq    CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
- 
-@@ -202,6 +205,10 @@ ENTRY(entry_SYSCALL_64)
-        */
- 
-       swapgs
-+      /*
-+       * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
-+       * is not required to switch CR3.
-+       */
-       movq    %rsp, PER_CPU_VAR(rsp_scratch)
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- 
-@@ -398,6 +405,7 @@ syscall_return_via_sysret:
-        * We are on the trampoline stack.  All regs except RDI are live.
-        * We can do future final exit work right here.
-        */
-+      SWITCH_TO_USER_CR3 scratch_reg=%rdi
- 
-       popq    %rdi
-       popq    %rsp
-@@ -735,6 +743,8 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
-        * We can do future final exit work right here.
-        */
- 
-+      SWITCH_TO_USER_CR3 scratch_reg=%rdi
-+
-       /* Restore RDI. */
-       popq    %rdi
-       SWAPGS
-@@ -817,7 +827,9 @@ native_irq_return_ldt:
-        */
- 
-       pushq   %rdi                            /* Stash user RDI */
--      SWAPGS
-+      SWAPGS                                  /* to kernel GS */
-+      SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi   /* to kernel CR3 */
-+
-       movq    PER_CPU_VAR(espfix_waddr), %rdi
-       movq    %rax, (0*8)(%rdi)               /* user RAX */
-       movq    (1*8)(%rsp), %rax               /* user RIP */
-@@ -833,7 +845,6 @@ native_irq_return_ldt:
-       /* Now RAX == RSP. */
- 
-       andl    $0xffff0000, %eax               /* RAX = (RSP & 0xffff0000) */
--      popq    %rdi                            /* Restore user RDI */
- 
-       /*
-        * espfix_stack[31:16] == 0.  The page tables are set up such that
-@@ -844,7 +855,11 @@ native_irq_return_ldt:
-        * still points to an RO alias of the ESPFIX stack.
-        */
-       orq     PER_CPU_VAR(espfix_stack), %rax
--      SWAPGS
-+
-+      SWITCH_TO_USER_CR3 scratch_reg=%rdi     /* to user CR3 */
-+      SWAPGS                                  /* to user GS */
-+      popq    %rdi                            /* Restore user RDI */
-+
-       movq    %rax, %rsp
-       UNWIND_HINT_IRET_REGS offset=8
- 
-@@ -957,6 +972,8 @@ ENTRY(switch_to_thread_stack)
-       UNWIND_HINT_FUNC
- 
-       pushq   %rdi
-+      /* Need to switch before accessing the thread stack. */
-+      SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
-       movq    %rsp, %rdi
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-       UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
-@@ -1256,7 +1273,11 @@ ENTRY(paranoid_entry)
-       js      1f                              /* negative -> in kernel */
-       SWAPGS
-       xorl    %ebx, %ebx
--1:    ret
-+
-+1:
-+      SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
-+
-+      ret
- END(paranoid_entry)
- 
- /*
-@@ -1278,6 +1299,7 @@ ENTRY(paranoid_exit)
-       testl   %ebx, %ebx                      /* swapgs needed? */
-       jnz     .Lparanoid_exit_no_swapgs
-       TRACE_IRQS_IRETQ
-+      RESTORE_CR3     save_reg=%r14
-       SWAPGS_UNSAFE_STACK
-       jmp     .Lparanoid_exit_restore
- .Lparanoid_exit_no_swapgs:
-@@ -1305,6 +1327,8 @@ ENTRY(error_entry)
-        * from user mode due to an IRET fault.
-        */
-       SWAPGS
-+      /* We have user CR3.  Change to kernel CR3. */
-+      SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
- 
- .Lerror_entry_from_usermode_after_swapgs:
-       /* Put us onto the real thread stack. */
-@@ -1351,6 +1375,7 @@ ENTRY(error_entry)
-        * .Lgs_change's error handler with kernel gsbase.
-        */
-       SWAPGS
-+      SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
-       jmp .Lerror_entry_done
- 
- .Lbstep_iret:
-@@ -1360,10 +1385,11 @@ ENTRY(error_entry)
- 
- .Lerror_bad_iret:
-       /*
--       * We came from an IRET to user mode, so we have user gsbase.
--       * Switch to kernel gsbase:
-+       * We came from an IRET to user mode, so we have user
-+       * gsbase and CR3.  Switch to kernel gsbase and CR3:
-        */
-       SWAPGS
-+      SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
- 
-       /*
-        * Pretend that the exception came from user mode: set up pt_regs
-@@ -1395,6 +1421,10 @@ END(error_exit)
- /*
-  * Runs on exception stack.  Xen PV does not go through this path at all,
-  * so we can use real assembly here.
-+ *
-+ * Registers:
-+ *    %r14: Used to save/restore the CR3 of the interrupted context
-+ *          when PAGE_TABLE_ISOLATION is in use.  Do not clobber.
-  */
- ENTRY(nmi)
-       UNWIND_HINT_IRET_REGS
-@@ -1458,6 +1488,7 @@ ENTRY(nmi)
- 
-       swapgs
-       cld
-+      SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
-       movq    %rsp, %rdx
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-       UNWIND_HINT_IRET_REGS base=%rdx offset=8
-@@ -1710,6 +1741,8 @@ end_repeat_nmi:
-       movq    $-1, %rsi
-       call    do_nmi
- 
-+      RESTORE_CR3 save_reg=%r14
-+
-       testl   %ebx, %ebx                      /* swapgs needed? */
-       jnz     nmi_restore
- nmi_swapgs:
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index 2270601b6218..43f856aeee67 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -48,6 +48,10 @@
- ENTRY(entry_SYSENTER_compat)
-       /* Interrupts are off on entry. */
-       SWAPGS
-+
-+      /* We are about to clobber %rsp anyway, clobbering here is OK */
-+      SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
-+
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- 
-       /*
-@@ -214,6 +218,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
-       pushq   $0                      /* pt_regs->r14 = 0 */
-       pushq   $0                      /* pt_regs->r15 = 0 */
- 
-+      /*
-+       * We just saved %rdi so it is safe to clobber.  It is not
-+       * preserved during the C calls inside TRACE_IRQS_OFF anyway.
-+       */
-+      SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
-+
-       /*
-        * User mode is traced as though IRQs are on, and SYSENTER
-        * turned them off.
-@@ -255,10 +265,22 @@ sysret32_from_system_call:
-        * when the system call started, which is already known to user
-        * code.  We zero R8-R10 to avoid info leaks.
-          */
-+      movq    RSP-ORIG_RAX(%rsp), %rsp
-+
-+      /*
-+       * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
-+       * on the process stack which is not mapped to userspace and
-+       * not readable after we SWITCH_TO_USER_CR3.  Delay the CR3
-+       * switch until after after the last reference to the process
-+       * stack.
-+       *
-+       * %r8 is zeroed before the sysret, thus safe to clobber.
-+       */
-+      SWITCH_TO_USER_CR3 scratch_reg=%r8
-+
-       xorq    %r8, %r8
-       xorq    %r9, %r9
-       xorq    %r10, %r10
--      movq    RSP-ORIG_RAX(%rsp), %rsp
-       swapgs
-       sysretl
- END(entry_SYSCALL_compat)
--- 
-2.14.2
-
diff --git a/patches/kernel/0192-x86-mm-pti-Add-infrastructure-for-page-table-isolati.patch b/patches/kernel/0192-x86-mm-pti-Add-infrastructure-for-page-table-isolati.patch

deleted file mode 100644 (file)

index b84d5ac..0000000
--- a/patches/kernel/0192-x86-mm-pti-Add-infrastructure-for-page-table-isolati.patch
+++ /dev/null
@@ -1,311 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 4 Dec 2017 15:07:36 +0100
-Subject: [PATCH] x86/mm/pti: Add infrastructure for page table isolation
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Add the initial files for kernel page table isolation, with a minimal init
-function and the boot time detection for this misfeature.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit aa8c6248f8c75acfd610fe15d8cae23cf70d9d09)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 50da124a01ed7a59f9b2c9551f622c5a27d1caec)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/admin-guide/kernel-parameters.txt |  2 +
- arch/x86/mm/Makefile                            |  7 ++-
- arch/x86/entry/calling.h                        |  7 +++
- arch/x86/include/asm/pti.h                      | 14 +++++
- include/linux/pti.h                             | 11 ++++
- arch/x86/boot/compressed/pagetable.c            |  3 +
- arch/x86/mm/init.c                              |  2 +
- arch/x86/mm/pti.c                               | 84 +++++++++++++++++++++++++
- init/main.c                                     |  3 +
- 9 files changed, 130 insertions(+), 3 deletions(-)
- create mode 100644 arch/x86/include/asm/pti.h
- create mode 100644 include/linux/pti.h
- create mode 100644 arch/x86/mm/pti.c
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 3510e255ef4c..e2a4608da5d2 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -2677,6 +2677,8 @@
-                       steal time is computed, but won't influence scheduler
-                       behaviour
- 
-+      nopti           [X86-64] Disable kernel page table isolation
-+
-       nolapic         [X86-32,APIC] Do not enable or use the local APIC.
- 
-       nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
-diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
-index 76f5399a8356..7aa68fc18abe 100644
---- a/arch/x86/mm/Makefile
-+++ b/arch/x86/mm/Makefile
-@@ -35,7 +35,8 @@ obj-$(CONFIG_AMD_NUMA)               += amdtopology.o
- obj-$(CONFIG_ACPI_NUMA)               += srat.o
- obj-$(CONFIG_NUMA_EMU)                += numa_emulation.o
- 
--obj-$(CONFIG_X86_INTEL_MPX)   += mpx.o
--obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
--obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
-+obj-$(CONFIG_X86_INTEL_MPX)                   += mpx.o
-+obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS)        += pkeys.o
-+obj-$(CONFIG_RANDOMIZE_MEMORY)                        += kaslr.o
-+obj-$(CONFIG_PAGE_TABLE_ISOLATION)            += pti.o
- 
-diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
-index dde6262be0a3..bb56f5346ae8 100644
---- a/arch/x86/entry/calling.h
-+++ b/arch/x86/entry/calling.h
-@@ -204,18 +204,23 @@ For 32-bit we have the following conventions - kernel is built with
- .endm
- 
- .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
-+      ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
-       mov     %cr3, \scratch_reg
-       ADJUST_KERNEL_CR3 \scratch_reg
-       mov     \scratch_reg, %cr3
-+.Lend_\@:
- .endm
- 
- .macro SWITCH_TO_USER_CR3 scratch_reg:req
-+      ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
-       mov     %cr3, \scratch_reg
-       ADJUST_USER_CR3 \scratch_reg
-       mov     \scratch_reg, %cr3
-+.Lend_\@:
- .endm
- 
- .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
-+      ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
-       movq    %cr3, \scratch_reg
-       movq    \scratch_reg, \save_reg
-       /*
-@@ -232,11 +237,13 @@ For 32-bit we have the following conventions - kernel is built with
- .endm
- 
- .macro RESTORE_CR3 save_reg:req
-+      ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
-       /*
-        * The CR3 write could be avoided when not changing its value,
-        * but would require a CR3 read *and* a scratch register.
-        */
-       movq    \save_reg, %cr3
-+.Lend_\@:
- .endm
- 
- #else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
-diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
-new file mode 100644
-index 000000000000..0b5ef05b2d2d
---- /dev/null
-+++ b/arch/x86/include/asm/pti.h
-@@ -0,0 +1,14 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef _ASM_X86_PTI_H
-+#define _ASM_X86_PTI_H
-+#ifndef __ASSEMBLY__
-+
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+extern void pti_init(void);
-+extern void pti_check_boottime_disable(void);
-+#else
-+static inline void pti_check_boottime_disable(void) { }
-+#endif
-+
-+#endif /* __ASSEMBLY__ */
-+#endif /* _ASM_X86_PTI_H */
-diff --git a/include/linux/pti.h b/include/linux/pti.h
-new file mode 100644
-index 000000000000..0174883a935a
---- /dev/null
-+++ b/include/linux/pti.h
-@@ -0,0 +1,11 @@
-+// SPDX-License-Identifier: GPL-2.0
-+#ifndef _INCLUDE_PTI_H
-+#define _INCLUDE_PTI_H
-+
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+#include <asm/pti.h>
-+#else
-+static inline void pti_init(void) { }
-+#endif
-+
-+#endif
-diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
-index 28029be47fbb..21d8839cdaa7 100644
---- a/arch/x86/boot/compressed/pagetable.c
-+++ b/arch/x86/boot/compressed/pagetable.c
-@@ -15,6 +15,9 @@
- #define __pa(x)  ((unsigned long)(x))
- #define __va(x)  ((void *)((unsigned long)(x)))
- 
-+/* No PAGE_TABLE_ISOLATION support needed either: */
-+#undef CONFIG_PAGE_TABLE_ISOLATION
-+
- #include "misc.h"
- 
- /* These actually do the work of building the kernel identity maps. */
-diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
-index 020223420308..af75069fb116 100644
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -20,6 +20,7 @@
- #include <asm/kaslr.h>
- #include <asm/hypervisor.h>
- #include <asm/cpufeature.h>
-+#include <asm/pti.h>
- 
- /*
-  * We need to define the tracepoints somewhere, and tlb.c
-@@ -630,6 +631,7 @@ void __init init_mem_mapping(void)
- {
-       unsigned long end;
- 
-+      pti_check_boottime_disable();
-       probe_page_size_mask();
-       setup_pcid();
- 
-diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
-new file mode 100644
-index 000000000000..375f23a758bc
---- /dev/null
-+++ b/arch/x86/mm/pti.c
-@@ -0,0 +1,84 @@
-+/*
-+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of version 2 of the GNU General Public License as
-+ * published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * General Public License for more details.
-+ *
-+ * This code is based in part on work published here:
-+ *
-+ *    https://github.com/IAIK/KAISER
-+ *
-+ * The original work was written by and and signed off by for the Linux
-+ * kernel by:
-+ *
-+ *   Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
-+ *   Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
-+ *   Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
-+ *   Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
-+ *
-+ * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
-+ * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
-+ *                   Andy Lutomirsky <luto@amacapital.net>
-+ */
-+#include <linux/kernel.h>
-+#include <linux/errno.h>
-+#include <linux/string.h>
-+#include <linux/types.h>
-+#include <linux/bug.h>
-+#include <linux/init.h>
-+#include <linux/spinlock.h>
-+#include <linux/mm.h>
-+#include <linux/uaccess.h>
-+
-+#include <asm/cpufeature.h>
-+#include <asm/hypervisor.h>
-+#include <asm/cmdline.h>
-+#include <asm/pti.h>
-+#include <asm/pgtable.h>
-+#include <asm/pgalloc.h>
-+#include <asm/tlbflush.h>
-+#include <asm/desc.h>
-+
-+#undef pr_fmt
-+#define pr_fmt(fmt)     "Kernel/User page tables isolation: " fmt
-+
-+static void __init pti_print_if_insecure(const char *reason)
-+{
-+      if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
-+              pr_info("%s\n", reason);
-+}
-+
-+void __init pti_check_boottime_disable(void)
-+{
-+      if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
-+              pti_print_if_insecure("disabled on XEN PV.");
-+              return;
-+      }
-+
-+      if (cmdline_find_option_bool(boot_command_line, "nopti")) {
-+              pti_print_if_insecure("disabled on command line.");
-+              return;
-+      }
-+
-+      if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
-+              return;
-+
-+      setup_force_cpu_cap(X86_FEATURE_PTI);
-+}
-+
-+/*
-+ * Initialize kernel page table isolation
-+ */
-+void __init pti_init(void)
-+{
-+      if (!static_cpu_has(X86_FEATURE_PTI))
-+              return;
-+
-+      pr_info("enabled\n");
-+}
-diff --git a/init/main.c b/init/main.c
-index de1c495da782..bb0896c24c08 100644
---- a/init/main.c
-+++ b/init/main.c
-@@ -75,6 +75,7 @@
- #include <linux/slab.h>
- #include <linux/perf_event.h>
- #include <linux/ptrace.h>
-+#include <linux/pti.h>
- #include <linux/blkdev.h>
- #include <linux/elevator.h>
- #include <linux/sched_clock.h>
-@@ -506,6 +507,8 @@ static void __init mm_init(void)
-       ioremap_huge_init();
-       /* Should be run before the first non-init thread is created */
-       init_espfix_bsp();
-+      /* Should be run after espfix64 is set up. */
-+      pti_init();
- }
- 
- asmlinkage __visible void __init start_kernel(void)
--- 
-2.14.2
-
diff --git a/patches/kernel/0192-x86-mm-pti-Prepare-the-x86-entry-assembly-code-for-e.patch b/patches/kernel/0192-x86-mm-pti-Prepare-the-x86-entry-assembly-code-for-e.patch

new file mode 100644 (file)

index 0000000..c16486b
--- /dev/null
+++ b/patches/kernel/0192-x86-mm-pti-Prepare-the-x86-entry-assembly-code-for-e.patch
@@ -0,0 +1,381 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:35 +0100
+Subject: [PATCH] x86/mm/pti: Prepare the x86/entry assembly code for
+ entry/exit CR3 switching
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+PAGE_TABLE_ISOLATION needs to switch to a different CR3 value when it
+enters the kernel and switch back when it exits.  This essentially needs to
+be done before leaving assembly code.
+
+This is extra challenging because the switching context is tricky: the
+registers that can be clobbered can vary.  It is also hard to store things
+on the stack because there is an established ABI (ptregs) or the stack is
+entirely unsafe to use.
+
+Establish a set of macros that allow changing to the user and kernel CR3
+values.
+
+Interactions with SWAPGS:
+
+  Previous versions of the PAGE_TABLE_ISOLATION code relied on having
+  per-CPU scratch space to save/restore a register that can be used for the
+  CR3 MOV.  The %GS register is used to index into our per-CPU space, so
+  SWAPGS *had* to be done before the CR3 switch.  That scratch space is gone
+  now, but the semantic that SWAPGS must be done before the CR3 MOV is
+  retained.  This is good to keep because it is not that hard to do and it
+  allows to do things like add per-CPU debugging information.
+
+What this does in the NMI code is worth pointing out.  NMIs can interrupt
+*any* context and they can also be nested with NMIs interrupting other
+NMIs.  The comments below ".Lnmi_from_kernel" explain the format of the
+stack during this situation.  Changing the format of this stack is hard.
+Instead of storing the old CR3 value on the stack, this depends on the
+*regular* register save/restore mechanism and then uses %r14 to keep CR3
+during the NMI.  It is callee-saved and will not be clobbered by the C NMI
+handlers that get called.
+
+[ PeterZ: ESPFIX optimization ]
+
+Based-on-code-from: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 8a09317b895f073977346779df52f67c1056d81d)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 313dfb599cf7f8e53fc6f710d15bed60972dcd6f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/calling.h         | 66 ++++++++++++++++++++++++++++++++++++++++
+ arch/x86/entry/entry_64.S        | 45 +++++++++++++++++++++++----
+ arch/x86/entry/entry_64_compat.S | 24 ++++++++++++++-
+ 3 files changed, 128 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
+index 1895a685d3dd..dde6262be0a3 100644
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -1,5 +1,7 @@
+ #include <linux/jump_label.h>
+ #include <asm/unwind_hints.h>
++#include <asm/cpufeatures.h>
++#include <asm/page_types.h>
+ 
+ /*
+ 
+@@ -186,6 +188,70 @@ For 32-bit we have the following conventions - kernel is built with
+ #endif
+ .endm
+ 
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++
++/* PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two halves: */
++#define PTI_SWITCH_MASK (1<<PAGE_SHIFT)
++
++.macro ADJUST_KERNEL_CR3 reg:req
++      /* Clear "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
++      andq    $(~PTI_SWITCH_MASK), \reg
++.endm
++
++.macro ADJUST_USER_CR3 reg:req
++      /* Move CR3 up a page to the user page tables: */
++      orq     $(PTI_SWITCH_MASK), \reg
++.endm
++
++.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
++      mov     %cr3, \scratch_reg
++      ADJUST_KERNEL_CR3 \scratch_reg
++      mov     \scratch_reg, %cr3
++.endm
++
++.macro SWITCH_TO_USER_CR3 scratch_reg:req
++      mov     %cr3, \scratch_reg
++      ADJUST_USER_CR3 \scratch_reg
++      mov     \scratch_reg, %cr3
++.endm
++
++.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
++      movq    %cr3, \scratch_reg
++      movq    \scratch_reg, \save_reg
++      /*
++       * Is the switch bit zero?  This means the address is
++       * up in real PAGE_TABLE_ISOLATION patches in a moment.
++       */
++      testq   $(PTI_SWITCH_MASK), \scratch_reg
++      jz      .Ldone_\@
++
++      ADJUST_KERNEL_CR3 \scratch_reg
++      movq    \scratch_reg, %cr3
++
++.Ldone_\@:
++.endm
++
++.macro RESTORE_CR3 save_reg:req
++      /*
++       * The CR3 write could be avoided when not changing its value,
++       * but would require a CR3 read *and* a scratch register.
++       */
++      movq    \save_reg, %cr3
++.endm
++
++#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
++
++.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
++.endm
++.macro SWITCH_TO_USER_CR3 scratch_reg:req
++.endm
++.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
++.endm
++.macro RESTORE_CR3 save_reg:req
++.endm
++
++#endif
++
+ #endif /* CONFIG_X86_64 */
+ 
+ /*
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 03e052f02176..292ccc6ec48d 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -163,6 +163,9 @@ ENTRY(entry_SYSCALL_64_trampoline)
+       /* Stash the user RSP. */
+       movq    %rsp, RSP_SCRATCH
+ 
++      /* Note: using %rsp as a scratch reg. */
++      SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
++
+       /* Load the top of the task stack into RSP */
+       movq    CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
+ 
+@@ -202,6 +205,10 @@ ENTRY(entry_SYSCALL_64)
+        */
+ 
+       swapgs
++      /*
++       * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
++       * is not required to switch CR3.
++       */
+       movq    %rsp, PER_CPU_VAR(rsp_scratch)
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ 
+@@ -398,6 +405,7 @@ syscall_return_via_sysret:
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
++      SWITCH_TO_USER_CR3 scratch_reg=%rdi
+ 
+       popq    %rdi
+       popq    %rsp
+@@ -735,6 +743,8 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+        * We can do future final exit work right here.
+        */
+ 
++      SWITCH_TO_USER_CR3 scratch_reg=%rdi
++
+       /* Restore RDI. */
+       popq    %rdi
+       SWAPGS
+@@ -817,7 +827,9 @@ native_irq_return_ldt:
+        */
+ 
+       pushq   %rdi                            /* Stash user RDI */
+-      SWAPGS
++      SWAPGS                                  /* to kernel GS */
++      SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi   /* to kernel CR3 */
++
+       movq    PER_CPU_VAR(espfix_waddr), %rdi
+       movq    %rax, (0*8)(%rdi)               /* user RAX */
+       movq    (1*8)(%rsp), %rax               /* user RIP */
+@@ -833,7 +845,6 @@ native_irq_return_ldt:
+       /* Now RAX == RSP. */
+ 
+       andl    $0xffff0000, %eax               /* RAX = (RSP & 0xffff0000) */
+-      popq    %rdi                            /* Restore user RDI */
+ 
+       /*
+        * espfix_stack[31:16] == 0.  The page tables are set up such that
+@@ -844,7 +855,11 @@ native_irq_return_ldt:
+        * still points to an RO alias of the ESPFIX stack.
+        */
+       orq     PER_CPU_VAR(espfix_stack), %rax
+-      SWAPGS
++
++      SWITCH_TO_USER_CR3 scratch_reg=%rdi     /* to user CR3 */
++      SWAPGS                                  /* to user GS */
++      popq    %rdi                            /* Restore user RDI */
++
+       movq    %rax, %rsp
+       UNWIND_HINT_IRET_REGS offset=8
+ 
+@@ -957,6 +972,8 @@ ENTRY(switch_to_thread_stack)
+       UNWIND_HINT_FUNC
+ 
+       pushq   %rdi
++      /* Need to switch before accessing the thread stack. */
++      SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
+@@ -1256,7 +1273,11 @@ ENTRY(paranoid_entry)
+       js      1f                              /* negative -> in kernel */
+       SWAPGS
+       xorl    %ebx, %ebx
+-1:    ret
++
++1:
++      SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
++
++      ret
+ END(paranoid_entry)
+ 
+ /*
+@@ -1278,6 +1299,7 @@ ENTRY(paranoid_exit)
+       testl   %ebx, %ebx                      /* swapgs needed? */
+       jnz     .Lparanoid_exit_no_swapgs
+       TRACE_IRQS_IRETQ
++      RESTORE_CR3     save_reg=%r14
+       SWAPGS_UNSAFE_STACK
+       jmp     .Lparanoid_exit_restore
+ .Lparanoid_exit_no_swapgs:
+@@ -1305,6 +1327,8 @@ ENTRY(error_entry)
+        * from user mode due to an IRET fault.
+        */
+       SWAPGS
++      /* We have user CR3.  Change to kernel CR3. */
++      SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ 
+ .Lerror_entry_from_usermode_after_swapgs:
+       /* Put us onto the real thread stack. */
+@@ -1351,6 +1375,7 @@ ENTRY(error_entry)
+        * .Lgs_change's error handler with kernel gsbase.
+        */
+       SWAPGS
++      SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+       jmp .Lerror_entry_done
+ 
+ .Lbstep_iret:
+@@ -1360,10 +1385,11 @@ ENTRY(error_entry)
+ 
+ .Lerror_bad_iret:
+       /*
+-       * We came from an IRET to user mode, so we have user gsbase.
+-       * Switch to kernel gsbase:
++       * We came from an IRET to user mode, so we have user
++       * gsbase and CR3.  Switch to kernel gsbase and CR3:
+        */
+       SWAPGS
++      SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ 
+       /*
+        * Pretend that the exception came from user mode: set up pt_regs
+@@ -1395,6 +1421,10 @@ END(error_exit)
+ /*
+  * Runs on exception stack.  Xen PV does not go through this path at all,
+  * so we can use real assembly here.
++ *
++ * Registers:
++ *    %r14: Used to save/restore the CR3 of the interrupted context
++ *          when PAGE_TABLE_ISOLATION is in use.  Do not clobber.
+  */
+ ENTRY(nmi)
+       UNWIND_HINT_IRET_REGS
+@@ -1458,6 +1488,7 @@ ENTRY(nmi)
+ 
+       swapgs
+       cld
++      SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
+       movq    %rsp, %rdx
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       UNWIND_HINT_IRET_REGS base=%rdx offset=8
+@@ -1710,6 +1741,8 @@ end_repeat_nmi:
+       movq    $-1, %rsi
+       call    do_nmi
+ 
++      RESTORE_CR3 save_reg=%r14
++
+       testl   %ebx, %ebx                      /* swapgs needed? */
+       jnz     nmi_restore
+ nmi_swapgs:
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index 2270601b6218..43f856aeee67 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -48,6 +48,10 @@
+ ENTRY(entry_SYSENTER_compat)
+       /* Interrupts are off on entry. */
+       SWAPGS
++
++      /* We are about to clobber %rsp anyway, clobbering here is OK */
++      SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
++
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ 
+       /*
+@@ -214,6 +218,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
+       pushq   $0                      /* pt_regs->r14 = 0 */
+       pushq   $0                      /* pt_regs->r15 = 0 */
+ 
++      /*
++       * We just saved %rdi so it is safe to clobber.  It is not
++       * preserved during the C calls inside TRACE_IRQS_OFF anyway.
++       */
++      SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
++
+       /*
+        * User mode is traced as though IRQs are on, and SYSENTER
+        * turned them off.
+@@ -255,10 +265,22 @@ sysret32_from_system_call:
+        * when the system call started, which is already known to user
+        * code.  We zero R8-R10 to avoid info leaks.
+          */
++      movq    RSP-ORIG_RAX(%rsp), %rsp
++
++      /*
++       * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
++       * on the process stack which is not mapped to userspace and
++       * not readable after we SWITCH_TO_USER_CR3.  Delay the CR3
++       * switch until after after the last reference to the process
++       * stack.
++       *
++       * %r8 is zeroed before the sysret, thus safe to clobber.
++       */
++      SWITCH_TO_USER_CR3 scratch_reg=%r8
++
+       xorq    %r8, %r8
+       xorq    %r9, %r9
+       xorq    %r10, %r10
+-      movq    RSP-ORIG_RAX(%rsp), %rsp
+       swapgs
+       sysretl
+ END(entry_SYSCALL_compat)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0193-x86-mm-pti-Add-infrastructure-for-page-table-isolati.patch b/patches/kernel/0193-x86-mm-pti-Add-infrastructure-for-page-table-isolati.patch

new file mode 100644 (file)

index 0000000..b84d5ac
--- /dev/null
+++ b/patches/kernel/0193-x86-mm-pti-Add-infrastructure-for-page-table-isolati.patch
@@ -0,0 +1,311 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:36 +0100
+Subject: [PATCH] x86/mm/pti: Add infrastructure for page table isolation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Add the initial files for kernel page table isolation, with a minimal init
+function and the boot time detection for this misfeature.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit aa8c6248f8c75acfd610fe15d8cae23cf70d9d09)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 50da124a01ed7a59f9b2c9551f622c5a27d1caec)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  2 +
+ arch/x86/mm/Makefile                            |  7 ++-
+ arch/x86/entry/calling.h                        |  7 +++
+ arch/x86/include/asm/pti.h                      | 14 +++++
+ include/linux/pti.h                             | 11 ++++
+ arch/x86/boot/compressed/pagetable.c            |  3 +
+ arch/x86/mm/init.c                              |  2 +
+ arch/x86/mm/pti.c                               | 84 +++++++++++++++++++++++++
+ init/main.c                                     |  3 +
+ 9 files changed, 130 insertions(+), 3 deletions(-)
+ create mode 100644 arch/x86/include/asm/pti.h
+ create mode 100644 include/linux/pti.h
+ create mode 100644 arch/x86/mm/pti.c
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 3510e255ef4c..e2a4608da5d2 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -2677,6 +2677,8 @@
+                       steal time is computed, but won't influence scheduler
+                       behaviour
+ 
++      nopti           [X86-64] Disable kernel page table isolation
++
+       nolapic         [X86-32,APIC] Do not enable or use the local APIC.
+ 
+       nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
+diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
+index 76f5399a8356..7aa68fc18abe 100644
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -35,7 +35,8 @@ obj-$(CONFIG_AMD_NUMA)               += amdtopology.o
+ obj-$(CONFIG_ACPI_NUMA)               += srat.o
+ obj-$(CONFIG_NUMA_EMU)                += numa_emulation.o
+ 
+-obj-$(CONFIG_X86_INTEL_MPX)   += mpx.o
+-obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+-obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
++obj-$(CONFIG_X86_INTEL_MPX)                   += mpx.o
++obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS)        += pkeys.o
++obj-$(CONFIG_RANDOMIZE_MEMORY)                        += kaslr.o
++obj-$(CONFIG_PAGE_TABLE_ISOLATION)            += pti.o
+ 
+diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
+index dde6262be0a3..bb56f5346ae8 100644
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -204,18 +204,23 @@ For 32-bit we have the following conventions - kernel is built with
+ .endm
+ 
+ .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
++      ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+       mov     %cr3, \scratch_reg
+       ADJUST_KERNEL_CR3 \scratch_reg
+       mov     \scratch_reg, %cr3
++.Lend_\@:
+ .endm
+ 
+ .macro SWITCH_TO_USER_CR3 scratch_reg:req
++      ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+       mov     %cr3, \scratch_reg
+       ADJUST_USER_CR3 \scratch_reg
+       mov     \scratch_reg, %cr3
++.Lend_\@:
+ .endm
+ 
+ .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
++      ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
+       movq    %cr3, \scratch_reg
+       movq    \scratch_reg, \save_reg
+       /*
+@@ -232,11 +237,13 @@ For 32-bit we have the following conventions - kernel is built with
+ .endm
+ 
+ .macro RESTORE_CR3 save_reg:req
++      ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+       /*
+        * The CR3 write could be avoided when not changing its value,
+        * but would require a CR3 read *and* a scratch register.
+        */
+       movq    \save_reg, %cr3
++.Lend_\@:
+ .endm
+ 
+ #else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
+diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
+new file mode 100644
+index 000000000000..0b5ef05b2d2d
+--- /dev/null
++++ b/arch/x86/include/asm/pti.h
+@@ -0,0 +1,14 @@
++// SPDX-License-Identifier: GPL-2.0
++#ifndef _ASM_X86_PTI_H
++#define _ASM_X86_PTI_H
++#ifndef __ASSEMBLY__
++
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++extern void pti_init(void);
++extern void pti_check_boottime_disable(void);
++#else
++static inline void pti_check_boottime_disable(void) { }
++#endif
++
++#endif /* __ASSEMBLY__ */
++#endif /* _ASM_X86_PTI_H */
+diff --git a/include/linux/pti.h b/include/linux/pti.h
+new file mode 100644
+index 000000000000..0174883a935a
+--- /dev/null
++++ b/include/linux/pti.h
+@@ -0,0 +1,11 @@
++// SPDX-License-Identifier: GPL-2.0
++#ifndef _INCLUDE_PTI_H
++#define _INCLUDE_PTI_H
++
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++#include <asm/pti.h>
++#else
++static inline void pti_init(void) { }
++#endif
++
++#endif
+diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
+index 28029be47fbb..21d8839cdaa7 100644
+--- a/arch/x86/boot/compressed/pagetable.c
++++ b/arch/x86/boot/compressed/pagetable.c
+@@ -15,6 +15,9 @@
+ #define __pa(x)  ((unsigned long)(x))
+ #define __va(x)  ((void *)((unsigned long)(x)))
+ 
++/* No PAGE_TABLE_ISOLATION support needed either: */
++#undef CONFIG_PAGE_TABLE_ISOLATION
++
+ #include "misc.h"
+ 
+ /* These actually do the work of building the kernel identity maps. */
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index 020223420308..af75069fb116 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -20,6 +20,7 @@
+ #include <asm/kaslr.h>
+ #include <asm/hypervisor.h>
+ #include <asm/cpufeature.h>
++#include <asm/pti.h>
+ 
+ /*
+  * We need to define the tracepoints somewhere, and tlb.c
+@@ -630,6 +631,7 @@ void __init init_mem_mapping(void)
+ {
+       unsigned long end;
+ 
++      pti_check_boottime_disable();
+       probe_page_size_mask();
+       setup_pcid();
+ 
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+new file mode 100644
+index 000000000000..375f23a758bc
+--- /dev/null
++++ b/arch/x86/mm/pti.c
+@@ -0,0 +1,84 @@
++/*
++ * Copyright(c) 2017 Intel Corporation. All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of version 2 of the GNU General Public License as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * This code is based in part on work published here:
++ *
++ *    https://github.com/IAIK/KAISER
++ *
++ * The original work was written by and and signed off by for the Linux
++ * kernel by:
++ *
++ *   Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
++ *   Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
++ *   Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
++ *   Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
++ *
++ * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
++ * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
++ *                   Andy Lutomirsky <luto@amacapital.net>
++ */
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/string.h>
++#include <linux/types.h>
++#include <linux/bug.h>
++#include <linux/init.h>
++#include <linux/spinlock.h>
++#include <linux/mm.h>
++#include <linux/uaccess.h>
++
++#include <asm/cpufeature.h>
++#include <asm/hypervisor.h>
++#include <asm/cmdline.h>
++#include <asm/pti.h>
++#include <asm/pgtable.h>
++#include <asm/pgalloc.h>
++#include <asm/tlbflush.h>
++#include <asm/desc.h>
++
++#undef pr_fmt
++#define pr_fmt(fmt)     "Kernel/User page tables isolation: " fmt
++
++static void __init pti_print_if_insecure(const char *reason)
++{
++      if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
++              pr_info("%s\n", reason);
++}
++
++void __init pti_check_boottime_disable(void)
++{
++      if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
++              pti_print_if_insecure("disabled on XEN PV.");
++              return;
++      }
++
++      if (cmdline_find_option_bool(boot_command_line, "nopti")) {
++              pti_print_if_insecure("disabled on command line.");
++              return;
++      }
++
++      if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
++              return;
++
++      setup_force_cpu_cap(X86_FEATURE_PTI);
++}
++
++/*
++ * Initialize kernel page table isolation
++ */
++void __init pti_init(void)
++{
++      if (!static_cpu_has(X86_FEATURE_PTI))
++              return;
++
++      pr_info("enabled\n");
++}
+diff --git a/init/main.c b/init/main.c
+index de1c495da782..bb0896c24c08 100644
+--- a/init/main.c
++++ b/init/main.c
+@@ -75,6 +75,7 @@
+ #include <linux/slab.h>
+ #include <linux/perf_event.h>
+ #include <linux/ptrace.h>
++#include <linux/pti.h>
+ #include <linux/blkdev.h>
+ #include <linux/elevator.h>
+ #include <linux/sched_clock.h>
+@@ -506,6 +507,8 @@ static void __init mm_init(void)
+       ioremap_huge_init();
+       /* Should be run before the first non-init thread is created */
+       init_espfix_bsp();
++      /* Should be run after espfix64 is set up. */
++      pti_init();
+ }
+ 
+ asmlinkage __visible void __init start_kernel(void)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0193-x86-pti-Add-the-pti-cmdline-option-and-documentation.patch b/patches/kernel/0193-x86-pti-Add-the-pti-cmdline-option-and-documentation.patch

deleted file mode 100644 (file)

index e5b25e7..0000000
--- a/patches/kernel/0193-x86-pti-Add-the-pti-cmdline-option-and-documentation.patch
+++ /dev/null
@@ -1,122 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Borislav Petkov <bp@suse.de>
-Date: Tue, 12 Dec 2017 14:39:52 +0100
-Subject: [PATCH] x86/pti: Add the pti= cmdline option and documentation
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Keep the "nopti" optional for traditional reasons.
-
-[ tglx: Don't allow force on when running on XEN PV and made 'on'
-       printout conditional ]
-
-Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
-Signed-off-by: Borislav Petkov <bp@suse.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Andy Lutomirsky <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Link: https://lkml.kernel.org/r/20171212133952.10177-1-bp@alien8.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 41f4c20b57a4890ea7f56ff8717cc83fefb8d537)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 96d3670fa8f88989fb7c0be5172a1378143f3296)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/admin-guide/kernel-parameters.txt |  6 ++++++
- arch/x86/mm/pti.c                               | 26 ++++++++++++++++++++++++-
- 2 files changed, 31 insertions(+), 1 deletion(-)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index e2a4608da5d2..b4d2edf316db 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -3247,6 +3247,12 @@
-       pt.             [PARIDE]
-                       See Documentation/blockdev/paride.txt.
- 
-+      pti=            [X86_64]
-+                      Control user/kernel address space isolation:
-+                      on - enable
-+                      off - disable
-+                      auto - default setting
-+
-       pty.legacy_count=
-                       [KNL] Number of legacy pty's. Overwrites compiled-in
-                       default number.
-diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
-index 375f23a758bc..a13f6b109865 100644
---- a/arch/x86/mm/pti.c
-+++ b/arch/x86/mm/pti.c
-@@ -54,21 +54,45 @@ static void __init pti_print_if_insecure(const char *reason)
-               pr_info("%s\n", reason);
- }
- 
-+static void __init pti_print_if_secure(const char *reason)
-+{
-+      if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
-+              pr_info("%s\n", reason);
-+}
-+
- void __init pti_check_boottime_disable(void)
- {
-+      char arg[5];
-+      int ret;
-+
-       if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
-               pti_print_if_insecure("disabled on XEN PV.");
-               return;
-       }
- 
-+      ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
-+      if (ret > 0)  {
-+              if (ret == 3 && !strncmp(arg, "off", 3)) {
-+                      pti_print_if_insecure("disabled on command line.");
-+                      return;
-+              }
-+              if (ret == 2 && !strncmp(arg, "on", 2)) {
-+                      pti_print_if_secure("force enabled on command line.");
-+                      goto enable;
-+              }
-+              if (ret == 4 && !strncmp(arg, "auto", 4))
-+                      goto autosel;
-+      }
-+
-       if (cmdline_find_option_bool(boot_command_line, "nopti")) {
-               pti_print_if_insecure("disabled on command line.");
-               return;
-       }
- 
-+autosel:
-       if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
-               return;
--
-+enable:
-       setup_force_cpu_cap(X86_FEATURE_PTI);
- }
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0194-x86-mm-pti-Add-mapping-helper-functions.patch b/patches/kernel/0194-x86-mm-pti-Add-mapping-helper-functions.patch

deleted file mode 100644 (file)

index ee78a97..0000000
--- a/patches/kernel/0194-x86-mm-pti-Add-mapping-helper-functions.patch
+++ /dev/null
@@ -1,235 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:07:37 +0100
-Subject: [PATCH] x86/mm/pti: Add mapping helper functions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Add the pagetable helper functions do manage the separate user space page
-tables.
-
-[ tglx: Split out from the big combo kaiser patch. Folded Andys
-       simplification and made it out of line as Boris suggested ]
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-kernel@vger.kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 61e9b3671007a5da8127955a1a3bda7e0d5f42e8)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit fb45c59197f3134db6e223bb4fec0529774c07e1)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/pgtable.h    |  6 ++-
- arch/x86/include/asm/pgtable_64.h | 92 +++++++++++++++++++++++++++++++++++++++
- arch/x86/mm/pti.c                 | 41 +++++++++++++++++
- 3 files changed, 138 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index bb8e9ea7deb4..abbb47c75467 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -894,7 +894,11 @@ static inline int pgd_none(pgd_t pgd)
-  * pgd_offset() returns a (pgd_t *)
-  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
-  */
--#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
-+#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
-+/*
-+ * a shortcut to get a pgd_t in a given mm
-+ */
-+#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
- /*
-  * a shortcut which implies the use of the kernel's pgd, instead
-  * of a process's
-diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
-index 2160c1fee920..1ac15b03cf30 100644
---- a/arch/x86/include/asm/pgtable_64.h
-+++ b/arch/x86/include/asm/pgtable_64.h
-@@ -130,9 +130,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
- #endif
- }
- 
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+/*
-+ * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
-+ * (8k-aligned and 8k in size).  The kernel one is at the beginning 4k and
-+ * the user one is in the last 4k.  To switch between them, you
-+ * just need to flip the 12th bit in their addresses.
-+ */
-+#define PTI_PGTABLE_SWITCH_BIT        PAGE_SHIFT
-+
-+/*
-+ * This generates better code than the inline assembly in
-+ * __set_bit().
-+ */
-+static inline void *ptr_set_bit(void *ptr, int bit)
-+{
-+      unsigned long __ptr = (unsigned long)ptr;
-+
-+      __ptr |= BIT(bit);
-+      return (void *)__ptr;
-+}
-+static inline void *ptr_clear_bit(void *ptr, int bit)
-+{
-+      unsigned long __ptr = (unsigned long)ptr;
-+
-+      __ptr &= ~BIT(bit);
-+      return (void *)__ptr;
-+}
-+
-+static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
-+{
-+      return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
-+}
-+
-+static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
-+{
-+      return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
-+}
-+
-+static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
-+{
-+      return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
-+}
-+
-+static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
-+{
-+      return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
-+}
-+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
-+
-+/*
-+ * Page table pages are page-aligned.  The lower half of the top
-+ * level is used for userspace and the top half for the kernel.
-+ *
-+ * Returns true for parts of the PGD that map userspace and
-+ * false for the parts that map the kernel.
-+ */
-+static inline bool pgdp_maps_userspace(void *__ptr)
-+{
-+      unsigned long ptr = (unsigned long)__ptr;
-+
-+      return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
-+}
-+
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
-+
-+/*
-+ * Take a PGD location (pgdp) and a pgd value that needs to be set there.
-+ * Populates the user and returns the resulting PGD that must be set in
-+ * the kernel copy of the page tables.
-+ */
-+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
-+{
-+      if (!static_cpu_has(X86_FEATURE_PTI))
-+              return pgd;
-+      return __pti_set_user_pgd(pgdp, pgd);
-+}
-+#else
-+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
-+{
-+      return pgd;
-+}
-+#endif
-+
- static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
- {
-+#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
-+      p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
-+#else
-       *p4dp = p4d;
-+#endif
- }
- 
- static inline void native_p4d_clear(p4d_t *p4d)
-@@ -146,7 +234,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
- 
- static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
- {
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+      *pgdp = pti_set_user_pgd(pgdp, pgd);
-+#else
-       *pgdp = pgd;
-+#endif
- }
- 
- static inline void native_pgd_clear(pgd_t *pgd)
-diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
-index a13f6b109865..69a983365392 100644
---- a/arch/x86/mm/pti.c
-+++ b/arch/x86/mm/pti.c
-@@ -96,6 +96,47 @@ void __init pti_check_boottime_disable(void)
-       setup_force_cpu_cap(X86_FEATURE_PTI);
- }
- 
-+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
-+{
-+      /*
-+       * Changes to the high (kernel) portion of the kernelmode page
-+       * tables are not automatically propagated to the usermode tables.
-+       *
-+       * Users should keep in mind that, unlike the kernelmode tables,
-+       * there is no vmalloc_fault equivalent for the usermode tables.
-+       * Top-level entries added to init_mm's usermode pgd after boot
-+       * will not be automatically propagated to other mms.
-+       */
-+      if (!pgdp_maps_userspace(pgdp))
-+              return pgd;
-+
-+      /*
-+       * The user page tables get the full PGD, accessible from
-+       * userspace:
-+       */
-+      kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
-+
-+      /*
-+       * If this is normal user memory, make it NX in the kernel
-+       * pagetables so that, if we somehow screw up and return to
-+       * usermode with the kernel CR3 loaded, we'll get a page fault
-+       * instead of allowing user code to execute with the wrong CR3.
-+       *
-+       * As exceptions, we don't set NX if:
-+       *  - _PAGE_USER is not set.  This could be an executable
-+       *     EFI runtime mapping or something similar, and the kernel
-+       *     may execute from it
-+       *  - we don't have NX support
-+       *  - we're clearing the PGD (i.e. the new pgd is not present).
-+       */
-+      if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
-+          (__supported_pte_mask & _PAGE_NX))
-+              pgd.pgd |= _PAGE_NX;
-+
-+      /* return the copy of the PGD we want the kernel to use: */
-+      return pgd;
-+}
-+
- /*
-  * Initialize kernel page table isolation
-  */
--- 
-2.14.2
-
diff --git a/patches/kernel/0194-x86-pti-Add-the-pti-cmdline-option-and-documentation.patch b/patches/kernel/0194-x86-pti-Add-the-pti-cmdline-option-and-documentation.patch

new file mode 100644 (file)

index 0000000..e5b25e7
--- /dev/null
+++ b/patches/kernel/0194-x86-pti-Add-the-pti-cmdline-option-and-documentation.patch
@@ -0,0 +1,122 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 12 Dec 2017 14:39:52 +0100
+Subject: [PATCH] x86/pti: Add the pti= cmdline option and documentation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Keep the "nopti" optional for traditional reasons.
+
+[ tglx: Don't allow force on when running on XEN PV and made 'on'
+       printout conditional ]
+
+Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirsky <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171212133952.10177-1-bp@alien8.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 41f4c20b57a4890ea7f56ff8717cc83fefb8d537)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 96d3670fa8f88989fb7c0be5172a1378143f3296)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  6 ++++++
+ arch/x86/mm/pti.c                               | 26 ++++++++++++++++++++++++-
+ 2 files changed, 31 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index e2a4608da5d2..b4d2edf316db 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -3247,6 +3247,12 @@
+       pt.             [PARIDE]
+                       See Documentation/blockdev/paride.txt.
+ 
++      pti=            [X86_64]
++                      Control user/kernel address space isolation:
++                      on - enable
++                      off - disable
++                      auto - default setting
++
+       pty.legacy_count=
+                       [KNL] Number of legacy pty's. Overwrites compiled-in
+                       default number.
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+index 375f23a758bc..a13f6b109865 100644
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -54,21 +54,45 @@ static void __init pti_print_if_insecure(const char *reason)
+               pr_info("%s\n", reason);
+ }
+ 
++static void __init pti_print_if_secure(const char *reason)
++{
++      if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
++              pr_info("%s\n", reason);
++}
++
+ void __init pti_check_boottime_disable(void)
+ {
++      char arg[5];
++      int ret;
++
+       if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
+               pti_print_if_insecure("disabled on XEN PV.");
+               return;
+       }
+ 
++      ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
++      if (ret > 0)  {
++              if (ret == 3 && !strncmp(arg, "off", 3)) {
++                      pti_print_if_insecure("disabled on command line.");
++                      return;
++              }
++              if (ret == 2 && !strncmp(arg, "on", 2)) {
++                      pti_print_if_secure("force enabled on command line.");
++                      goto enable;
++              }
++              if (ret == 4 && !strncmp(arg, "auto", 4))
++                      goto autosel;
++      }
++
+       if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+               pti_print_if_insecure("disabled on command line.");
+               return;
+       }
+ 
++autosel:
+       if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+               return;
+-
++enable:
+       setup_force_cpu_cap(X86_FEATURE_PTI);
+ }
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0195-x86-mm-pti-Add-mapping-helper-functions.patch b/patches/kernel/0195-x86-mm-pti-Add-mapping-helper-functions.patch

new file mode 100644 (file)

index 0000000..ee78a97
--- /dev/null
+++ b/patches/kernel/0195-x86-mm-pti-Add-mapping-helper-functions.patch
@@ -0,0 +1,235 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:37 +0100
+Subject: [PATCH] x86/mm/pti: Add mapping helper functions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Add the pagetable helper functions do manage the separate user space page
+tables.
+
+[ tglx: Split out from the big combo kaiser patch. Folded Andys
+       simplification and made it out of line as Boris suggested ]
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 61e9b3671007a5da8127955a1a3bda7e0d5f42e8)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit fb45c59197f3134db6e223bb4fec0529774c07e1)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/pgtable.h    |  6 ++-
+ arch/x86/include/asm/pgtable_64.h | 92 +++++++++++++++++++++++++++++++++++++++
+ arch/x86/mm/pti.c                 | 41 +++++++++++++++++
+ 3 files changed, 138 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index bb8e9ea7deb4..abbb47c75467 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -894,7 +894,11 @@ static inline int pgd_none(pgd_t pgd)
+  * pgd_offset() returns a (pgd_t *)
+  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
+  */
+-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
++#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
++/*
++ * a shortcut to get a pgd_t in a given mm
++ */
++#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
+ /*
+  * a shortcut which implies the use of the kernel's pgd, instead
+  * of a process's
+diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
+index 2160c1fee920..1ac15b03cf30 100644
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -130,9 +130,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
+ #endif
+ }
+ 
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++/*
++ * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
++ * (8k-aligned and 8k in size).  The kernel one is at the beginning 4k and
++ * the user one is in the last 4k.  To switch between them, you
++ * just need to flip the 12th bit in their addresses.
++ */
++#define PTI_PGTABLE_SWITCH_BIT        PAGE_SHIFT
++
++/*
++ * This generates better code than the inline assembly in
++ * __set_bit().
++ */
++static inline void *ptr_set_bit(void *ptr, int bit)
++{
++      unsigned long __ptr = (unsigned long)ptr;
++
++      __ptr |= BIT(bit);
++      return (void *)__ptr;
++}
++static inline void *ptr_clear_bit(void *ptr, int bit)
++{
++      unsigned long __ptr = (unsigned long)ptr;
++
++      __ptr &= ~BIT(bit);
++      return (void *)__ptr;
++}
++
++static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
++{
++      return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
++}
++
++static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
++{
++      return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
++}
++
++static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
++{
++      return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
++}
++
++static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
++{
++      return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
++}
++#endif /* CONFIG_PAGE_TABLE_ISOLATION */
++
++/*
++ * Page table pages are page-aligned.  The lower half of the top
++ * level is used for userspace and the top half for the kernel.
++ *
++ * Returns true for parts of the PGD that map userspace and
++ * false for the parts that map the kernel.
++ */
++static inline bool pgdp_maps_userspace(void *__ptr)
++{
++      unsigned long ptr = (unsigned long)__ptr;
++
++      return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
++}
++
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
++
++/*
++ * Take a PGD location (pgdp) and a pgd value that needs to be set there.
++ * Populates the user and returns the resulting PGD that must be set in
++ * the kernel copy of the page tables.
++ */
++static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
++{
++      if (!static_cpu_has(X86_FEATURE_PTI))
++              return pgd;
++      return __pti_set_user_pgd(pgdp, pgd);
++}
++#else
++static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
++{
++      return pgd;
++}
++#endif
++
+ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
+ {
++#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
++      p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
++#else
+       *p4dp = p4d;
++#endif
+ }
+ 
+ static inline void native_p4d_clear(p4d_t *p4d)
+@@ -146,7 +234,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
+ 
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++      *pgdp = pti_set_user_pgd(pgdp, pgd);
++#else
+       *pgdp = pgd;
++#endif
+ }
+ 
+ static inline void native_pgd_clear(pgd_t *pgd)
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+index a13f6b109865..69a983365392 100644
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -96,6 +96,47 @@ void __init pti_check_boottime_disable(void)
+       setup_force_cpu_cap(X86_FEATURE_PTI);
+ }
+ 
++pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
++{
++      /*
++       * Changes to the high (kernel) portion of the kernelmode page
++       * tables are not automatically propagated to the usermode tables.
++       *
++       * Users should keep in mind that, unlike the kernelmode tables,
++       * there is no vmalloc_fault equivalent for the usermode tables.
++       * Top-level entries added to init_mm's usermode pgd after boot
++       * will not be automatically propagated to other mms.
++       */
++      if (!pgdp_maps_userspace(pgdp))
++              return pgd;
++
++      /*
++       * The user page tables get the full PGD, accessible from
++       * userspace:
++       */
++      kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
++
++      /*
++       * If this is normal user memory, make it NX in the kernel
++       * pagetables so that, if we somehow screw up and return to
++       * usermode with the kernel CR3 loaded, we'll get a page fault
++       * instead of allowing user code to execute with the wrong CR3.
++       *
++       * As exceptions, we don't set NX if:
++       *  - _PAGE_USER is not set.  This could be an executable
++       *     EFI runtime mapping or something similar, and the kernel
++       *     may execute from it
++       *  - we don't have NX support
++       *  - we're clearing the PGD (i.e. the new pgd is not present).
++       */
++      if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
++          (__supported_pte_mask & _PAGE_NX))
++              pgd.pgd |= _PAGE_NX;
++
++      /* return the copy of the PGD we want the kernel to use: */
++      return pgd;
++}
++
+ /*
+  * Initialize kernel page table isolation
+  */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0195-x86-mm-pti-Allow-NX-poison-to-be-set-in-p4d-pgd.patch b/patches/kernel/0195-x86-mm-pti-Allow-NX-poison-to-be-set-in-p4d-pgd.patch

deleted file mode 100644 (file)

index 7060437..0000000
--- a/patches/kernel/0195-x86-mm-pti-Allow-NX-poison-to-be-set-in-p4d-pgd.patch
+++ /dev/null
@@ -1,84 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:07:38 +0100
-Subject: [PATCH] x86/mm/pti: Allow NX poison to be set in p4d/pgd
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-With PAGE_TABLE_ISOLATION the user portion of the kernel page tables is
-poisoned with the NX bit so if the entry code exits with the kernel page
-tables selected in CR3, userspace crashes.
-
-But doing so trips the p4d/pgd_bad() checks.  Make sure it does not do
-that.
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-kernel@vger.kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 1c4de1ff4fe50453b968579ee86fac3da80dd783)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 889a8bd0e57e39e7ce337e87c55fa59c09644d4e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/pgtable.h | 14 ++++++++++++--
- 1 file changed, 12 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index abbb47c75467..3ef8415b2358 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -831,7 +831,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
- 
- static inline int p4d_bad(p4d_t p4d)
- {
--      return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
-+      unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
-+
-+      if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
-+              ignore_flags |= _PAGE_NX;
-+
-+      return (p4d_flags(p4d) & ~ignore_flags) != 0;
- }
- #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
- 
-@@ -865,7 +870,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
- 
- static inline int pgd_bad(pgd_t pgd)
- {
--      return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
-+      unsigned long ignore_flags = _PAGE_USER;
-+
-+      if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
-+              ignore_flags |= _PAGE_NX;
-+
-+      return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
- }
- 
- static inline int pgd_none(pgd_t pgd)
--- 
-2.14.2
-
diff --git a/patches/kernel/0196-x86-mm-pti-Allocate-a-separate-user-PGD.patch b/patches/kernel/0196-x86-mm-pti-Allocate-a-separate-user-PGD.patch

deleted file mode 100644 (file)

index f2a2ce0..0000000
--- a/patches/kernel/0196-x86-mm-pti-Allocate-a-separate-user-PGD.patch
+++ /dev/null
@@ -1,199 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:07:39 +0100
-Subject: [PATCH] x86/mm/pti: Allocate a separate user PGD
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Kernel page table isolation requires to have two PGDs. One for the kernel,
-which contains the full kernel mapping plus the user space mapping and one
-for user space which contains the user space mappings and the minimal set
-of kernel mappings which are required by the architecture to be able to
-transition from and to user space.
-
-Add the necessary preliminaries.
-
-[ tglx: Split out from the big kaiser dump. EFI fixup from Kirill ]
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit d9e9a6418065bb376e5de8d93ce346939b9a37a6)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 0bd4b34e330d8bedf90c0497dfcef2e2286c4367)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/pgalloc.h | 11 +++++++++++
- arch/x86/mm/pgtable.c          |  5 +++--
- arch/x86/platform/efi/efi_64.c |  5 ++++-
- arch/x86/kernel/head_64.S      | 30 +++++++++++++++++++++++++++---
- 4 files changed, 45 insertions(+), 6 deletions(-)
-
-diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
-index b2d0cd8288aa..d65b0dee7448 100644
---- a/arch/x86/include/asm/pgalloc.h
-+++ b/arch/x86/include/asm/pgalloc.h
-@@ -29,6 +29,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
-  */
- extern gfp_t __userpte_alloc_gfp;
- 
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+/*
-+ * Instead of one PGD, we acquire two PGDs.  Being order-1, it is
-+ * both 8k in size and 8k-aligned.  That lets us just flip bit 12
-+ * in a pointer to swap between the two 4k halves.
-+ */
-+#define PGD_ALLOCATION_ORDER 1
-+#else
-+#define PGD_ALLOCATION_ORDER 0
-+#endif
-+
- /*
-  * Allocate and free page tables.
-  */
-diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
-index 942391b5b639..90d1d8f49cf6 100644
---- a/arch/x86/mm/pgtable.c
-+++ b/arch/x86/mm/pgtable.c
-@@ -354,14 +354,15 @@ static inline void _pgd_free(pgd_t *pgd)
-               kmem_cache_free(pgd_cache, pgd);
- }
- #else
-+
- static inline pgd_t *_pgd_alloc(void)
- {
--      return (pgd_t *)__get_free_page(PGALLOC_GFP);
-+      return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
- }
- 
- static inline void _pgd_free(pgd_t *pgd)
- {
--      free_page((unsigned long)pgd);
-+      free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
- }
- #endif /* CONFIG_X86_PAE */
- 
-diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
-index 9bf72f5bfedb..b104224d3d6c 100644
---- a/arch/x86/platform/efi/efi_64.c
-+++ b/arch/x86/platform/efi/efi_64.c
-@@ -194,6 +194,9 @@ static pgd_t *efi_pgd;
-  * because we want to avoid inserting EFI region mappings (EFI_VA_END
-  * to EFI_VA_START) into the standard kernel page tables. Everything
-  * else can be shared, see efi_sync_low_kernel_mappings().
-+ *
-+ * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
-+ * allocation.
-  */
- int __init efi_alloc_page_tables(void)
- {
-@@ -206,7 +209,7 @@ int __init efi_alloc_page_tables(void)
-               return 0;
- 
-       gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
--      efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
-+      efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
-       if (!efi_pgd)
-               return -ENOMEM;
- 
-diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
-index e785734980ad..eeaaaab54b2a 100644
---- a/arch/x86/kernel/head_64.S
-+++ b/arch/x86/kernel/head_64.S
-@@ -324,6 +324,27 @@ GLOBAL(early_recursion_flag)
-       .balign PAGE_SIZE; \
- GLOBAL(name)
- 
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+/*
-+ * Each PGD needs to be 8k long and 8k aligned.  We do not
-+ * ever go out to userspace with these, so we do not
-+ * strictly *need* the second page, but this allows us to
-+ * have a single set_pgd() implementation that does not
-+ * need to worry about whether it has 4k or 8k to work
-+ * with.
-+ *
-+ * This ensures PGDs are 8k long:
-+ */
-+#define PTI_USER_PGD_FILL     512
-+/* This ensures they are 8k-aligned: */
-+#define NEXT_PGD_PAGE(name) \
-+      .balign 2 * PAGE_SIZE; \
-+GLOBAL(name)
-+#else
-+#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
-+#define PTI_USER_PGD_FILL     0
-+#endif
-+
- /* Automate the creation of 1 to 1 mapping pmd entries */
- #define PMDS(START, PERM, COUNT)                      \
-       i = 0 ;                                         \
-@@ -333,13 +354,14 @@ GLOBAL(name)
-       .endr
- 
-       __INITDATA
--NEXT_PAGE(early_top_pgt)
-+NEXT_PGD_PAGE(early_top_pgt)
-       .fill   511,8,0
- #ifdef CONFIG_X86_5LEVEL
-       .quad   level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
- #else
-       .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
- #endif
-+      .fill   PTI_USER_PGD_FILL,8,0
- 
- NEXT_PAGE(early_dynamic_pgts)
-       .fill   512*EARLY_DYNAMIC_PAGE_TABLES,8,0
-@@ -347,13 +369,14 @@ NEXT_PAGE(early_dynamic_pgts)
-       .data
- 
- #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
--NEXT_PAGE(init_top_pgt)
-+NEXT_PGD_PAGE(init_top_pgt)
-       .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
-       .org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
-       .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
-       .org    init_top_pgt + PGD_START_KERNEL*8, 0
-       /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
-       .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
-+      .fill   PTI_USER_PGD_FILL,8,0
- 
- NEXT_PAGE(level3_ident_pgt)
-       .quad   level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
-@@ -364,8 +387,9 @@ NEXT_PAGE(level2_ident_pgt)
-        */
-       PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
- #else
--NEXT_PAGE(init_top_pgt)
-+NEXT_PGD_PAGE(init_top_pgt)
-       .fill   512,8,0
-+      .fill   PTI_USER_PGD_FILL,8,0
- #endif
- 
- #ifdef CONFIG_X86_5LEVEL
--- 
-2.14.2
-
diff --git a/patches/kernel/0196-x86-mm-pti-Allow-NX-poison-to-be-set-in-p4d-pgd.patch b/patches/kernel/0196-x86-mm-pti-Allow-NX-poison-to-be-set-in-p4d-pgd.patch

new file mode 100644 (file)

index 0000000..7060437
--- /dev/null
+++ b/patches/kernel/0196-x86-mm-pti-Allow-NX-poison-to-be-set-in-p4d-pgd.patch
@@ -0,0 +1,84 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:38 +0100
+Subject: [PATCH] x86/mm/pti: Allow NX poison to be set in p4d/pgd
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+With PAGE_TABLE_ISOLATION the user portion of the kernel page tables is
+poisoned with the NX bit so if the entry code exits with the kernel page
+tables selected in CR3, userspace crashes.
+
+But doing so trips the p4d/pgd_bad() checks.  Make sure it does not do
+that.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 1c4de1ff4fe50453b968579ee86fac3da80dd783)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 889a8bd0e57e39e7ce337e87c55fa59c09644d4e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/pgtable.h | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index abbb47c75467..3ef8415b2358 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -831,7 +831,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+ 
+ static inline int p4d_bad(p4d_t p4d)
+ {
+-      return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
++      unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
++
++      if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
++              ignore_flags |= _PAGE_NX;
++
++      return (p4d_flags(p4d) & ~ignore_flags) != 0;
+ }
+ #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
+ 
+@@ -865,7 +870,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+ 
+ static inline int pgd_bad(pgd_t pgd)
+ {
+-      return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
++      unsigned long ignore_flags = _PAGE_USER;
++
++      if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
++              ignore_flags |= _PAGE_NX;
++
++      return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
+ }
+ 
+ static inline int pgd_none(pgd_t pgd)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0197-x86-mm-pti-Allocate-a-separate-user-PGD.patch b/patches/kernel/0197-x86-mm-pti-Allocate-a-separate-user-PGD.patch

new file mode 100644 (file)

index 0000000..f2a2ce0
--- /dev/null
+++ b/patches/kernel/0197-x86-mm-pti-Allocate-a-separate-user-PGD.patch
@@ -0,0 +1,199 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:39 +0100
+Subject: [PATCH] x86/mm/pti: Allocate a separate user PGD
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Kernel page table isolation requires to have two PGDs. One for the kernel,
+which contains the full kernel mapping plus the user space mapping and one
+for user space which contains the user space mappings and the minimal set
+of kernel mappings which are required by the architecture to be able to
+transition from and to user space.
+
+Add the necessary preliminaries.
+
+[ tglx: Split out from the big kaiser dump. EFI fixup from Kirill ]
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit d9e9a6418065bb376e5de8d93ce346939b9a37a6)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 0bd4b34e330d8bedf90c0497dfcef2e2286c4367)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/pgalloc.h | 11 +++++++++++
+ arch/x86/mm/pgtable.c          |  5 +++--
+ arch/x86/platform/efi/efi_64.c |  5 ++++-
+ arch/x86/kernel/head_64.S      | 30 +++++++++++++++++++++++++++---
+ 4 files changed, 45 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
+index b2d0cd8288aa..d65b0dee7448 100644
+--- a/arch/x86/include/asm/pgalloc.h
++++ b/arch/x86/include/asm/pgalloc.h
+@@ -29,6 +29,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
+  */
+ extern gfp_t __userpte_alloc_gfp;
+ 
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++/*
++ * Instead of one PGD, we acquire two PGDs.  Being order-1, it is
++ * both 8k in size and 8k-aligned.  That lets us just flip bit 12
++ * in a pointer to swap between the two 4k halves.
++ */
++#define PGD_ALLOCATION_ORDER 1
++#else
++#define PGD_ALLOCATION_ORDER 0
++#endif
++
+ /*
+  * Allocate and free page tables.
+  */
+diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
+index 942391b5b639..90d1d8f49cf6 100644
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -354,14 +354,15 @@ static inline void _pgd_free(pgd_t *pgd)
+               kmem_cache_free(pgd_cache, pgd);
+ }
+ #else
++
+ static inline pgd_t *_pgd_alloc(void)
+ {
+-      return (pgd_t *)__get_free_page(PGALLOC_GFP);
++      return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
+ }
+ 
+ static inline void _pgd_free(pgd_t *pgd)
+ {
+-      free_page((unsigned long)pgd);
++      free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
+ }
+ #endif /* CONFIG_X86_PAE */
+ 
+diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
+index 9bf72f5bfedb..b104224d3d6c 100644
+--- a/arch/x86/platform/efi/efi_64.c
++++ b/arch/x86/platform/efi/efi_64.c
+@@ -194,6 +194,9 @@ static pgd_t *efi_pgd;
+  * because we want to avoid inserting EFI region mappings (EFI_VA_END
+  * to EFI_VA_START) into the standard kernel page tables. Everything
+  * else can be shared, see efi_sync_low_kernel_mappings().
++ *
++ * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
++ * allocation.
+  */
+ int __init efi_alloc_page_tables(void)
+ {
+@@ -206,7 +209,7 @@ int __init efi_alloc_page_tables(void)
+               return 0;
+ 
+       gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
+-      efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
++      efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
+       if (!efi_pgd)
+               return -ENOMEM;
+ 
+diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
+index e785734980ad..eeaaaab54b2a 100644
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -324,6 +324,27 @@ GLOBAL(early_recursion_flag)
+       .balign PAGE_SIZE; \
+ GLOBAL(name)
+ 
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++/*
++ * Each PGD needs to be 8k long and 8k aligned.  We do not
++ * ever go out to userspace with these, so we do not
++ * strictly *need* the second page, but this allows us to
++ * have a single set_pgd() implementation that does not
++ * need to worry about whether it has 4k or 8k to work
++ * with.
++ *
++ * This ensures PGDs are 8k long:
++ */
++#define PTI_USER_PGD_FILL     512
++/* This ensures they are 8k-aligned: */
++#define NEXT_PGD_PAGE(name) \
++      .balign 2 * PAGE_SIZE; \
++GLOBAL(name)
++#else
++#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
++#define PTI_USER_PGD_FILL     0
++#endif
++
+ /* Automate the creation of 1 to 1 mapping pmd entries */
+ #define PMDS(START, PERM, COUNT)                      \
+       i = 0 ;                                         \
+@@ -333,13 +354,14 @@ GLOBAL(name)
+       .endr
+ 
+       __INITDATA
+-NEXT_PAGE(early_top_pgt)
++NEXT_PGD_PAGE(early_top_pgt)
+       .fill   511,8,0
+ #ifdef CONFIG_X86_5LEVEL
+       .quad   level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ #else
+       .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ #endif
++      .fill   PTI_USER_PGD_FILL,8,0
+ 
+ NEXT_PAGE(early_dynamic_pgts)
+       .fill   512*EARLY_DYNAMIC_PAGE_TABLES,8,0
+@@ -347,13 +369,14 @@ NEXT_PAGE(early_dynamic_pgts)
+       .data
+ 
+ #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
+-NEXT_PAGE(init_top_pgt)
++NEXT_PGD_PAGE(init_top_pgt)
+       .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+       .org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
+       .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+       .org    init_top_pgt + PGD_START_KERNEL*8, 0
+       /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+       .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
++      .fill   PTI_USER_PGD_FILL,8,0
+ 
+ NEXT_PAGE(level3_ident_pgt)
+       .quad   level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+@@ -364,8 +387,9 @@ NEXT_PAGE(level2_ident_pgt)
+        */
+       PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+ #else
+-NEXT_PAGE(init_top_pgt)
++NEXT_PGD_PAGE(init_top_pgt)
+       .fill   512,8,0
++      .fill   PTI_USER_PGD_FILL,8,0
+ #endif
+ 
+ #ifdef CONFIG_X86_5LEVEL
+-- 
+2.14.2
+
diff --git a/patches/kernel/0197-x86-mm-pti-Populate-user-PGD.patch b/patches/kernel/0197-x86-mm-pti-Populate-user-PGD.patch

deleted file mode 100644 (file)

index 09d7bc8..0000000
--- a/patches/kernel/0197-x86-mm-pti-Populate-user-PGD.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:07:40 +0100
-Subject: [PATCH] x86/mm/pti: Populate user PGD
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-In clone_pgd_range() copy the init user PGDs which cover the kernel half of
-the address space, so a process has all the required kernel mappings
-visible.
-
-[ tglx: Split out from the big kaiser dump and folded Andys simplification ]
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit fc2fbc8512ed08d1de7720936fd7d2e4ce02c3a2)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 26c08c52162e1079cbb3e9ce8e1346a100ea7ccc)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/pgtable.h | 9 ++++++++-
- 1 file changed, 8 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index 3ef8415b2358..25604b8a251a 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -1104,7 +1104,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
-  */
- static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
- {
--       memcpy(dst, src, count * sizeof(pgd_t));
-+      memcpy(dst, src, count * sizeof(pgd_t));
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+      if (!static_cpu_has(X86_FEATURE_PTI))
-+              return;
-+      /* Clone the user space pgd as well */
-+      memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
-+             count * sizeof(pgd_t));
-+#endif
- }
- 
- #define PTE_SHIFT ilog2(PTRS_PER_PTE)
--- 
-2.14.2
-
diff --git a/patches/kernel/0198-x86-mm-pti-Add-functions-to-clone-kernel-PMDs.patch b/patches/kernel/0198-x86-mm-pti-Add-functions-to-clone-kernel-PMDs.patch

deleted file mode 100644 (file)

index dc510fd..0000000
--- a/patches/kernel/0198-x86-mm-pti-Add-functions-to-clone-kernel-PMDs.patch
+++ /dev/null
@@ -1,204 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:42 +0100
-Subject: [PATCH] x86/mm/pti: Add functions to clone kernel PMDs
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Provide infrastructure to:
-
- - find a kernel PMD for a mapping which must be visible to user space for
-   the entry/exit code to work.
-
- - walk an address range and share the kernel PMD with it.
-
-This reuses a small part of the original KAISER patches to populate the
-user space page table.
-
-[ tglx: Made it universally usable so it can be used for any kind of shared
-       mapping. Add a mechanism to clear specific bits in the user space
-       visible PMD entry. Folded Andys simplifactions ]
-
-Originally-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 03f4424f348e8be95eb1bbeba09461cd7b867828)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 262ab7e8665e88581d20ccaefa107340457224bb)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/pti.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 127 insertions(+)
-
-diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
-index 69a983365392..d58bcee470fc 100644
---- a/arch/x86/mm/pti.c
-+++ b/arch/x86/mm/pti.c
-@@ -48,6 +48,11 @@
- #undef pr_fmt
- #define pr_fmt(fmt)     "Kernel/User page tables isolation: " fmt
- 
-+/* Backporting helper */
-+#ifndef __GFP_NOTRACK
-+#define __GFP_NOTRACK 0
-+#endif
-+
- static void __init pti_print_if_insecure(const char *reason)
- {
-       if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
-@@ -137,6 +142,128 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
-       return pgd;
- }
- 
-+/*
-+ * Walk the user copy of the page tables (optionally) trying to allocate
-+ * page table pages on the way down.
-+ *
-+ * Returns a pointer to a P4D on success, or NULL on failure.
-+ */
-+static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
-+{
-+      pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
-+      gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
-+
-+      if (address < PAGE_OFFSET) {
-+              WARN_ONCE(1, "attempt to walk user address\n");
-+              return NULL;
-+      }
-+
-+      if (pgd_none(*pgd)) {
-+              unsigned long new_p4d_page = __get_free_page(gfp);
-+              if (!new_p4d_page)
-+                      return NULL;
-+
-+              if (pgd_none(*pgd)) {
-+                      set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
-+                      new_p4d_page = 0;
-+              }
-+              if (new_p4d_page)
-+                      free_page(new_p4d_page);
-+      }
-+      BUILD_BUG_ON(pgd_large(*pgd) != 0);
-+
-+      return p4d_offset(pgd, address);
-+}
-+
-+/*
-+ * Walk the user copy of the page tables (optionally) trying to allocate
-+ * page table pages on the way down.
-+ *
-+ * Returns a pointer to a PMD on success, or NULL on failure.
-+ */
-+static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
-+{
-+      gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
-+      p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
-+      pud_t *pud;
-+
-+      BUILD_BUG_ON(p4d_large(*p4d) != 0);
-+      if (p4d_none(*p4d)) {
-+              unsigned long new_pud_page = __get_free_page(gfp);
-+              if (!new_pud_page)
-+                      return NULL;
-+
-+              if (p4d_none(*p4d)) {
-+                      set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
-+                      new_pud_page = 0;
-+              }
-+              if (new_pud_page)
-+                      free_page(new_pud_page);
-+      }
-+
-+      pud = pud_offset(p4d, address);
-+      /* The user page tables do not use large mappings: */
-+      if (pud_large(*pud)) {
-+              WARN_ON(1);
-+              return NULL;
-+      }
-+      if (pud_none(*pud)) {
-+              unsigned long new_pmd_page = __get_free_page(gfp);
-+              if (!new_pmd_page)
-+                      return NULL;
-+
-+              if (pud_none(*pud)) {
-+                      set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
-+                      new_pmd_page = 0;
-+              }
-+              if (new_pmd_page)
-+                      free_page(new_pmd_page);
-+      }
-+
-+      return pmd_offset(pud, address);
-+}
-+
-+static void __init
-+pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
-+{
-+      unsigned long addr;
-+
-+      /*
-+       * Clone the populated PMDs which cover start to end. These PMD areas
-+       * can have holes.
-+       */
-+      for (addr = start; addr < end; addr += PMD_SIZE) {
-+              pmd_t *pmd, *target_pmd;
-+              pgd_t *pgd;
-+              p4d_t *p4d;
-+              pud_t *pud;
-+
-+              pgd = pgd_offset_k(addr);
-+              if (WARN_ON(pgd_none(*pgd)))
-+                      return;
-+              p4d = p4d_offset(pgd, addr);
-+              if (WARN_ON(p4d_none(*p4d)))
-+                      return;
-+              pud = pud_offset(p4d, addr);
-+              if (pud_none(*pud))
-+                      continue;
-+              pmd = pmd_offset(pud, addr);
-+              if (pmd_none(*pmd))
-+                      continue;
-+
-+              target_pmd = pti_user_pagetable_walk_pmd(addr);
-+              if (WARN_ON(!target_pmd))
-+                      return;
-+
-+              /*
-+               * Copy the PMD.  That is, the kernelmode and usermode
-+               * tables will share the last-level page tables of this
-+               * address range
-+               */
-+              *target_pmd = pmd_clear_flags(*pmd, clear);
-+      }
-+}
-+
- /*
-  * Initialize kernel page table isolation
-  */
--- 
-2.14.2
-
diff --git a/patches/kernel/0198-x86-mm-pti-Populate-user-PGD.patch b/patches/kernel/0198-x86-mm-pti-Populate-user-PGD.patch

new file mode 100644 (file)

index 0000000..09d7bc8
--- /dev/null
+++ b/patches/kernel/0198-x86-mm-pti-Populate-user-PGD.patch
@@ -0,0 +1,70 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:40 +0100
+Subject: [PATCH] x86/mm/pti: Populate user PGD
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+In clone_pgd_range() copy the init user PGDs which cover the kernel half of
+the address space, so a process has all the required kernel mappings
+visible.
+
+[ tglx: Split out from the big kaiser dump and folded Andys simplification ]
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit fc2fbc8512ed08d1de7720936fd7d2e4ce02c3a2)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 26c08c52162e1079cbb3e9ce8e1346a100ea7ccc)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/pgtable.h | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index 3ef8415b2358..25604b8a251a 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -1104,7 +1104,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+  */
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+-       memcpy(dst, src, count * sizeof(pgd_t));
++      memcpy(dst, src, count * sizeof(pgd_t));
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++      if (!static_cpu_has(X86_FEATURE_PTI))
++              return;
++      /* Clone the user space pgd as well */
++      memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
++             count * sizeof(pgd_t));
++#endif
+ }
+ 
+ #define PTE_SHIFT ilog2(PTRS_PER_PTE)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0199-x86-mm-pti-Add-functions-to-clone-kernel-PMDs.patch b/patches/kernel/0199-x86-mm-pti-Add-functions-to-clone-kernel-PMDs.patch

new file mode 100644 (file)

index 0000000..dc510fd
--- /dev/null
+++ b/patches/kernel/0199-x86-mm-pti-Add-functions-to-clone-kernel-PMDs.patch
@@ -0,0 +1,204 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:42 +0100
+Subject: [PATCH] x86/mm/pti: Add functions to clone kernel PMDs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Provide infrastructure to:
+
+ - find a kernel PMD for a mapping which must be visible to user space for
+   the entry/exit code to work.
+
+ - walk an address range and share the kernel PMD with it.
+
+This reuses a small part of the original KAISER patches to populate the
+user space page table.
+
+[ tglx: Made it universally usable so it can be used for any kind of shared
+       mapping. Add a mechanism to clear specific bits in the user space
+       visible PMD entry. Folded Andys simplifactions ]
+
+Originally-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 03f4424f348e8be95eb1bbeba09461cd7b867828)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 262ab7e8665e88581d20ccaefa107340457224bb)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/pti.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 127 insertions(+)
+
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+index 69a983365392..d58bcee470fc 100644
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -48,6 +48,11 @@
+ #undef pr_fmt
+ #define pr_fmt(fmt)     "Kernel/User page tables isolation: " fmt
+ 
++/* Backporting helper */
++#ifndef __GFP_NOTRACK
++#define __GFP_NOTRACK 0
++#endif
++
+ static void __init pti_print_if_insecure(const char *reason)
+ {
+       if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+@@ -137,6 +142,128 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+       return pgd;
+ }
+ 
++/*
++ * Walk the user copy of the page tables (optionally) trying to allocate
++ * page table pages on the way down.
++ *
++ * Returns a pointer to a P4D on success, or NULL on failure.
++ */
++static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
++{
++      pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
++      gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
++
++      if (address < PAGE_OFFSET) {
++              WARN_ONCE(1, "attempt to walk user address\n");
++              return NULL;
++      }
++
++      if (pgd_none(*pgd)) {
++              unsigned long new_p4d_page = __get_free_page(gfp);
++              if (!new_p4d_page)
++                      return NULL;
++
++              if (pgd_none(*pgd)) {
++                      set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
++                      new_p4d_page = 0;
++              }
++              if (new_p4d_page)
++                      free_page(new_p4d_page);
++      }
++      BUILD_BUG_ON(pgd_large(*pgd) != 0);
++
++      return p4d_offset(pgd, address);
++}
++
++/*
++ * Walk the user copy of the page tables (optionally) trying to allocate
++ * page table pages on the way down.
++ *
++ * Returns a pointer to a PMD on success, or NULL on failure.
++ */
++static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
++{
++      gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
++      p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
++      pud_t *pud;
++
++      BUILD_BUG_ON(p4d_large(*p4d) != 0);
++      if (p4d_none(*p4d)) {
++              unsigned long new_pud_page = __get_free_page(gfp);
++              if (!new_pud_page)
++                      return NULL;
++
++              if (p4d_none(*p4d)) {
++                      set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
++                      new_pud_page = 0;
++              }
++              if (new_pud_page)
++                      free_page(new_pud_page);
++      }
++
++      pud = pud_offset(p4d, address);
++      /* The user page tables do not use large mappings: */
++      if (pud_large(*pud)) {
++              WARN_ON(1);
++              return NULL;
++      }
++      if (pud_none(*pud)) {
++              unsigned long new_pmd_page = __get_free_page(gfp);
++              if (!new_pmd_page)
++                      return NULL;
++
++              if (pud_none(*pud)) {
++                      set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
++                      new_pmd_page = 0;
++              }
++              if (new_pmd_page)
++                      free_page(new_pmd_page);
++      }
++
++      return pmd_offset(pud, address);
++}
++
++static void __init
++pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
++{
++      unsigned long addr;
++
++      /*
++       * Clone the populated PMDs which cover start to end. These PMD areas
++       * can have holes.
++       */
++      for (addr = start; addr < end; addr += PMD_SIZE) {
++              pmd_t *pmd, *target_pmd;
++              pgd_t *pgd;
++              p4d_t *p4d;
++              pud_t *pud;
++
++              pgd = pgd_offset_k(addr);
++              if (WARN_ON(pgd_none(*pgd)))
++                      return;
++              p4d = p4d_offset(pgd, addr);
++              if (WARN_ON(p4d_none(*p4d)))
++                      return;
++              pud = pud_offset(p4d, addr);
++              if (pud_none(*pud))
++                      continue;
++              pmd = pmd_offset(pud, addr);
++              if (pmd_none(*pmd))
++                      continue;
++
++              target_pmd = pti_user_pagetable_walk_pmd(addr);
++              if (WARN_ON(!target_pmd))
++                      return;
++
++              /*
++               * Copy the PMD.  That is, the kernelmode and usermode
++               * tables will share the last-level page tables of this
++               * address range
++               */
++              *target_pmd = pmd_clear_flags(*pmd, clear);
++      }
++}
++
+ /*
+  * Initialize kernel page table isolation
+  */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0199-x86-mm-pti-Force-entry-through-trampoline-when-PTI-a.patch b/patches/kernel/0199-x86-mm-pti-Force-entry-through-trampoline-when-PTI-a.patch

deleted file mode 100644 (file)

index c0d2a1c..0000000
--- a/patches/kernel/0199-x86-mm-pti-Force-entry-through-trampoline-when-PTI-a.patch
+++ /dev/null
@@ -1,63 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 4 Dec 2017 15:07:43 +0100
-Subject: [PATCH] x86/mm/pti: Force entry through trampoline when PTI active
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Force the entry through the trampoline only when PTI is active. Otherwise
-go through the normal entry code.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 8d4b067895791ab9fdb1aadfc505f64d71239dd2)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9ae1ea4821648be179a96fe65b3ed4bd111a5c98)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/common.c | 5 ++++-
- 1 file changed, 4 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 623ba3635793..99f37d1636ff 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -1340,7 +1340,10 @@ void syscall_init(void)
-               (entry_SYSCALL_64_trampoline - _entry_trampoline);
- 
-       wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
--      wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
-+      if (static_cpu_has(X86_FEATURE_PTI))
-+              wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
-+      else
-+              wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
- 
- #ifdef CONFIG_IA32_EMULATION
-       wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
--- 
-2.14.2
-
diff --git a/patches/kernel/0200-x86-mm-pti-Force-entry-through-trampoline-when-PTI-a.patch b/patches/kernel/0200-x86-mm-pti-Force-entry-through-trampoline-when-PTI-a.patch

new file mode 100644 (file)

index 0000000..c0d2a1c
--- /dev/null
+++ b/patches/kernel/0200-x86-mm-pti-Force-entry-through-trampoline-when-PTI-a.patch
@@ -0,0 +1,63 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:43 +0100
+Subject: [PATCH] x86/mm/pti: Force entry through trampoline when PTI active
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Force the entry through the trampoline only when PTI is active. Otherwise
+go through the normal entry code.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 8d4b067895791ab9fdb1aadfc505f64d71239dd2)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9ae1ea4821648be179a96fe65b3ed4bd111a5c98)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/common.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 623ba3635793..99f37d1636ff 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1340,7 +1340,10 @@ void syscall_init(void)
+               (entry_SYSCALL_64_trampoline - _entry_trampoline);
+ 
+       wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
+-      wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
++      if (static_cpu_has(X86_FEATURE_PTI))
++              wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
++      else
++              wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+ 
+ #ifdef CONFIG_IA32_EMULATION
+       wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0200-x86-mm-pti-Share-cpu_entry_area-with-user-space-page.patch b/patches/kernel/0200-x86-mm-pti-Share-cpu_entry_area-with-user-space-page.patch

deleted file mode 100644 (file)

index 2decc9c..0000000
--- a/patches/kernel/0200-x86-mm-pti-Share-cpu_entry_area-with-user-space-page.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Mon, 4 Dec 2017 15:07:45 +0100
-Subject: [PATCH] x86/mm/pti: Share cpu_entry_area with user space page tables
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Share the cpu entry area so the user space and kernel space page tables
-have the same P4D page.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit f7cfbee91559ca7e3e961a00ffac921208a115ad)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 6e8142de3a6e84a82a421b66a74ba37976912282)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/pti.c | 25 +++++++++++++++++++++++++
- 1 file changed, 25 insertions(+)
-
-diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
-index d58bcee470fc..59290356f19f 100644
---- a/arch/x86/mm/pti.c
-+++ b/arch/x86/mm/pti.c
-@@ -264,6 +264,29 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
-       }
- }
- 
-+/*
-+ * Clone a single p4d (i.e. a top-level entry on 4-level systems and a
-+ * next-level entry on 5-level systems.
-+ */
-+static void __init pti_clone_p4d(unsigned long addr)
-+{
-+      p4d_t *kernel_p4d, *user_p4d;
-+      pgd_t *kernel_pgd;
-+
-+      user_p4d = pti_user_pagetable_walk_p4d(addr);
-+      kernel_pgd = pgd_offset_k(addr);
-+      kernel_p4d = p4d_offset(kernel_pgd, addr);
-+      *user_p4d = *kernel_p4d;
-+}
-+
-+/*
-+ * Clone the CPU_ENTRY_AREA into the user space visible page table.
-+ */
-+static void __init pti_clone_user_shared(void)
-+{
-+      pti_clone_p4d(CPU_ENTRY_AREA_BASE);
-+}
-+
- /*
-  * Initialize kernel page table isolation
-  */
-@@ -273,4 +296,6 @@ void __init pti_init(void)
-               return;
- 
-       pr_info("enabled\n");
-+
-+      pti_clone_user_shared();
- }
--- 
-2.14.2
-
diff --git a/patches/kernel/0201-x86-entry-Align-entry-text-section-to-PMD-boundary.patch b/patches/kernel/0201-x86-entry-Align-entry-text-section-to-PMD-boundary.patch

deleted file mode 100644 (file)

index 4b7d5a4..0000000
--- a/patches/kernel/0201-x86-entry-Align-entry-text-section-to-PMD-boundary.patch
+++ /dev/null
@@ -1,79 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 4 Dec 2017 15:07:46 +0100
-Subject: [PATCH] x86/entry: Align entry text section to PMD boundary
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The (irq)entry text must be visible in the user space page tables. To allow
-simple PMD based sharing, make the entry text PMD aligned.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 2f7412ba9c6af5ab16bdbb4a3fdb1dcd2b4fd3c2)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 3cf72b14b56834882ebe731d5fa84d249c56a188)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/vmlinux.lds.S | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
-index 423aa36f0150..f505d8dbdccf 100644
---- a/arch/x86/kernel/vmlinux.lds.S
-+++ b/arch/x86/kernel/vmlinux.lds.S
-@@ -60,11 +60,17 @@ jiffies_64 = jiffies;
-               . = ALIGN(HPAGE_SIZE);                          \
-               __end_rodata_hpage_align = .;
- 
-+#define ALIGN_ENTRY_TEXT_BEGIN        . = ALIGN(PMD_SIZE);
-+#define ALIGN_ENTRY_TEXT_END  . = ALIGN(PMD_SIZE);
-+
- #else
- 
- #define X64_ALIGN_RODATA_BEGIN
- #define X64_ALIGN_RODATA_END
- 
-+#define ALIGN_ENTRY_TEXT_BEGIN
-+#define ALIGN_ENTRY_TEXT_END
-+
- #endif
- 
- PHDRS {
-@@ -101,8 +107,10 @@ SECTIONS
-               CPUIDLE_TEXT
-               LOCK_TEXT
-               KPROBES_TEXT
-+              ALIGN_ENTRY_TEXT_BEGIN
-               ENTRY_TEXT
-               IRQENTRY_TEXT
-+              ALIGN_ENTRY_TEXT_END
-               SOFTIRQENTRY_TEXT
-               *(.fixup)
-               *(.gnu.warning)
--- 
-2.14.2
-
diff --git a/patches/kernel/0201-x86-mm-pti-Share-cpu_entry_area-with-user-space-page.patch b/patches/kernel/0201-x86-mm-pti-Share-cpu_entry_area-with-user-space-page.patch

new file mode 100644 (file)

index 0000000..2decc9c
--- /dev/null
+++ b/patches/kernel/0201-x86-mm-pti-Share-cpu_entry_area-with-user-space-page.patch
@@ -0,0 +1,87 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 4 Dec 2017 15:07:45 +0100
+Subject: [PATCH] x86/mm/pti: Share cpu_entry_area with user space page tables
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Share the cpu entry area so the user space and kernel space page tables
+have the same P4D page.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit f7cfbee91559ca7e3e961a00ffac921208a115ad)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 6e8142de3a6e84a82a421b66a74ba37976912282)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/pti.c | 25 +++++++++++++++++++++++++
+ 1 file changed, 25 insertions(+)
+
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+index d58bcee470fc..59290356f19f 100644
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -264,6 +264,29 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
+       }
+ }
+ 
++/*
++ * Clone a single p4d (i.e. a top-level entry on 4-level systems and a
++ * next-level entry on 5-level systems.
++ */
++static void __init pti_clone_p4d(unsigned long addr)
++{
++      p4d_t *kernel_p4d, *user_p4d;
++      pgd_t *kernel_pgd;
++
++      user_p4d = pti_user_pagetable_walk_p4d(addr);
++      kernel_pgd = pgd_offset_k(addr);
++      kernel_p4d = p4d_offset(kernel_pgd, addr);
++      *user_p4d = *kernel_p4d;
++}
++
++/*
++ * Clone the CPU_ENTRY_AREA into the user space visible page table.
++ */
++static void __init pti_clone_user_shared(void)
++{
++      pti_clone_p4d(CPU_ENTRY_AREA_BASE);
++}
++
+ /*
+  * Initialize kernel page table isolation
+  */
+@@ -273,4 +296,6 @@ void __init pti_init(void)
+               return;
+ 
+       pr_info("enabled\n");
++
++      pti_clone_user_shared();
+ }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0202-x86-entry-Align-entry-text-section-to-PMD-boundary.patch b/patches/kernel/0202-x86-entry-Align-entry-text-section-to-PMD-boundary.patch

new file mode 100644 (file)

index 0000000..4b7d5a4
--- /dev/null
+++ b/patches/kernel/0202-x86-entry-Align-entry-text-section-to-PMD-boundary.patch
@@ -0,0 +1,79 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:46 +0100
+Subject: [PATCH] x86/entry: Align entry text section to PMD boundary
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The (irq)entry text must be visible in the user space page tables. To allow
+simple PMD based sharing, make the entry text PMD aligned.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 2f7412ba9c6af5ab16bdbb4a3fdb1dcd2b4fd3c2)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 3cf72b14b56834882ebe731d5fa84d249c56a188)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/vmlinux.lds.S | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
+index 423aa36f0150..f505d8dbdccf 100644
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -60,11 +60,17 @@ jiffies_64 = jiffies;
+               . = ALIGN(HPAGE_SIZE);                          \
+               __end_rodata_hpage_align = .;
+ 
++#define ALIGN_ENTRY_TEXT_BEGIN        . = ALIGN(PMD_SIZE);
++#define ALIGN_ENTRY_TEXT_END  . = ALIGN(PMD_SIZE);
++
+ #else
+ 
+ #define X64_ALIGN_RODATA_BEGIN
+ #define X64_ALIGN_RODATA_END
+ 
++#define ALIGN_ENTRY_TEXT_BEGIN
++#define ALIGN_ENTRY_TEXT_END
++
+ #endif
+ 
+ PHDRS {
+@@ -101,8 +107,10 @@ SECTIONS
+               CPUIDLE_TEXT
+               LOCK_TEXT
+               KPROBES_TEXT
++              ALIGN_ENTRY_TEXT_BEGIN
+               ENTRY_TEXT
+               IRQENTRY_TEXT
++              ALIGN_ENTRY_TEXT_END
+               SOFTIRQENTRY_TEXT
+               *(.fixup)
+               *(.gnu.warning)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0202-x86-mm-pti-Share-entry-text-PMD.patch b/patches/kernel/0202-x86-mm-pti-Share-entry-text-PMD.patch

deleted file mode 100644 (file)

index eb6ec0d..0000000
--- a/patches/kernel/0202-x86-mm-pti-Share-entry-text-PMD.patch
+++ /dev/null
@@ -1,74 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 4 Dec 2017 15:07:47 +0100
-Subject: [PATCH] x86/mm/pti: Share entry text PMD
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Share the entry text PMD of the kernel mapping with the user space
-mapping. If large pages are enabled this is a single PMD entry and at the
-point where it is copied into the user page table the RW bit has not been
-cleared yet. Clear it right away so the user space visible map becomes RX.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 6dc72c3cbca0580642808d677181cad4c6433893)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit ee98d7446b4a7c12a57a38b1a5f51e3df0ac2cf3)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/pti.c | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
-index 59290356f19f..0e78797650a7 100644
---- a/arch/x86/mm/pti.c
-+++ b/arch/x86/mm/pti.c
-@@ -287,6 +287,15 @@ static void __init pti_clone_user_shared(void)
-       pti_clone_p4d(CPU_ENTRY_AREA_BASE);
- }
- 
-+/*
-+ * Clone the populated PMDs of the entry and irqentry text and force it RO.
-+ */
-+static void __init pti_clone_entry_text(void)
-+{
-+      pti_clone_pmds((unsigned long) __entry_text_start,
-+                      (unsigned long) __irqentry_text_end, _PAGE_RW);
-+}
-+
- /*
-  * Initialize kernel page table isolation
-  */
-@@ -298,4 +307,5 @@ void __init pti_init(void)
-       pr_info("enabled\n");
- 
-       pti_clone_user_shared();
-+      pti_clone_entry_text();
- }
--- 
-2.14.2
-
diff --git a/patches/kernel/0203-x86-mm-pti-Map-ESPFIX-into-user-space.patch b/patches/kernel/0203-x86-mm-pti-Map-ESPFIX-into-user-space.patch

deleted file mode 100644 (file)

index 0db3a23..0000000
--- a/patches/kernel/0203-x86-mm-pti-Map-ESPFIX-into-user-space.patch
+++ /dev/null
@@ -1,64 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Fri, 15 Dec 2017 22:08:18 +0100
-Subject: [PATCH] x86/mm/pti: Map ESPFIX into user space
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Map the ESPFIX pages into user space when PTI is enabled.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Kees Cook <keescook@chromium.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 4b6bbe95b87966ba08999574db65c93c5e925a36)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f5103cc3035ae6d1816404696ee2eb06d53b6709)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/pti.c | 11 +++++++++++
- 1 file changed, 11 insertions(+)
-
-diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
-index 0e78797650a7..b1c38ef9fbbb 100644
---- a/arch/x86/mm/pti.c
-+++ b/arch/x86/mm/pti.c
-@@ -287,6 +287,16 @@ static void __init pti_clone_user_shared(void)
-       pti_clone_p4d(CPU_ENTRY_AREA_BASE);
- }
- 
-+/*
-+ * Clone the ESPFIX P4D into the user space visinble page table
-+ */
-+static void __init pti_setup_espfix64(void)
-+{
-+#ifdef CONFIG_X86_ESPFIX64
-+      pti_clone_p4d(ESPFIX_BASE_ADDR);
-+#endif
-+}
-+
- /*
-  * Clone the populated PMDs of the entry and irqentry text and force it RO.
-  */
-@@ -308,4 +318,5 @@ void __init pti_init(void)
- 
-       pti_clone_user_shared();
-       pti_clone_entry_text();
-+      pti_setup_espfix64();
- }
--- 
-2.14.2
-
diff --git a/patches/kernel/0203-x86-mm-pti-Share-entry-text-PMD.patch b/patches/kernel/0203-x86-mm-pti-Share-entry-text-PMD.patch

new file mode 100644 (file)

index 0000000..eb6ec0d
--- /dev/null
+++ b/patches/kernel/0203-x86-mm-pti-Share-entry-text-PMD.patch
@@ -0,0 +1,74 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:47 +0100
+Subject: [PATCH] x86/mm/pti: Share entry text PMD
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Share the entry text PMD of the kernel mapping with the user space
+mapping. If large pages are enabled this is a single PMD entry and at the
+point where it is copied into the user page table the RW bit has not been
+cleared yet. Clear it right away so the user space visible map becomes RX.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 6dc72c3cbca0580642808d677181cad4c6433893)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit ee98d7446b4a7c12a57a38b1a5f51e3df0ac2cf3)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/pti.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+index 59290356f19f..0e78797650a7 100644
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -287,6 +287,15 @@ static void __init pti_clone_user_shared(void)
+       pti_clone_p4d(CPU_ENTRY_AREA_BASE);
+ }
+ 
++/*
++ * Clone the populated PMDs of the entry and irqentry text and force it RO.
++ */
++static void __init pti_clone_entry_text(void)
++{
++      pti_clone_pmds((unsigned long) __entry_text_start,
++                      (unsigned long) __irqentry_text_end, _PAGE_RW);
++}
++
+ /*
+  * Initialize kernel page table isolation
+  */
+@@ -298,4 +307,5 @@ void __init pti_init(void)
+       pr_info("enabled\n");
+ 
+       pti_clone_user_shared();
++      pti_clone_entry_text();
+ }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0204-x86-cpu_entry_area-Add-debugstore-entries-to-cpu_ent.patch b/patches/kernel/0204-x86-cpu_entry_area-Add-debugstore-entries-to-cpu_ent.patch

deleted file mode 100644 (file)

index c9ca293..0000000
--- a/patches/kernel/0204-x86-cpu_entry_area-Add-debugstore-entries-to-cpu_ent.patch
+++ /dev/null
@@ -1,244 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 4 Dec 2017 15:07:49 +0100
-Subject: [PATCH] x86/cpu_entry_area: Add debugstore entries to cpu_entry_area
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The Intel PEBS/BTS debug store is a design trainwreck as it expects virtual
-addresses which must be visible in any execution context.
-
-So it is required to make these mappings visible to user space when kernel
-page table isolation is active.
-
-Provide enough room for the buffer mappings in the cpu_entry_area so the
-buffers are available in the user space visible page tables.
-
-At the point where the kernel side entry area is populated there is no
-buffer available yet, but the kernel PMD must be populated. To achieve this
-set the entries for these buffers to non present.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 10043e02db7f8a4161f76434931051e7d797a5f6)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 4b9996f9c2d35d23a9fa2afe4f161402e6f28309)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/events/perf_event.h          | 21 ++------------------
- arch/x86/include/asm/cpu_entry_area.h | 13 +++++++++++++
- arch/x86/include/asm/intel_ds.h       | 36 +++++++++++++++++++++++++++++++++++
- arch/x86/events/intel/ds.c            |  5 +++--
- arch/x86/mm/cpu_entry_area.c          | 27 ++++++++++++++++++++++++++
- 5 files changed, 81 insertions(+), 21 deletions(-)
- create mode 100644 arch/x86/include/asm/intel_ds.h
-
-diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
-index 590eaf7c2c3e..308bc14f58af 100644
---- a/arch/x86/events/perf_event.h
-+++ b/arch/x86/events/perf_event.h
-@@ -14,6 +14,8 @@
- 
- #include <linux/perf_event.h>
- 
-+#include <asm/intel_ds.h>
-+
- /* To enable MSR tracing please use the generic trace points. */
- 
- /*
-@@ -77,8 +79,6 @@ struct amd_nb {
-       struct event_constraint event_constraints[X86_PMC_IDX_MAX];
- };
- 
--/* The maximal number of PEBS events: */
--#define MAX_PEBS_EVENTS               8
- #define PEBS_COUNTER_MASK     ((1ULL << MAX_PEBS_EVENTS) - 1)
- 
- /*
-@@ -95,23 +95,6 @@ struct amd_nb {
-       PERF_SAMPLE_TRANSACTION | \
-       PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
- 
--/*
-- * A debug store configuration.
-- *
-- * We only support architectures that use 64bit fields.
-- */
--struct debug_store {
--      u64     bts_buffer_base;
--      u64     bts_index;
--      u64     bts_absolute_maximum;
--      u64     bts_interrupt_threshold;
--      u64     pebs_buffer_base;
--      u64     pebs_index;
--      u64     pebs_absolute_maximum;
--      u64     pebs_interrupt_threshold;
--      u64     pebs_event_reset[MAX_PEBS_EVENTS];
--};
--
- #define PEBS_REGS \
-       (PERF_REG_X86_AX | \
-        PERF_REG_X86_BX | \
-diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
-index 2fbc69a0916e..4a7884b8dca5 100644
---- a/arch/x86/include/asm/cpu_entry_area.h
-+++ b/arch/x86/include/asm/cpu_entry_area.h
-@@ -5,6 +5,7 @@
- 
- #include <linux/percpu-defs.h>
- #include <asm/processor.h>
-+#include <asm/intel_ds.h>
- 
- /*
-  * cpu_entry_area is a percpu region that contains things needed by the CPU
-@@ -40,6 +41,18 @@ struct cpu_entry_area {
-        */
-       char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
- #endif
-+#ifdef CONFIG_CPU_SUP_INTEL
-+      /*
-+       * Per CPU debug store for Intel performance monitoring. Wastes a
-+       * full page at the moment.
-+       */
-+      struct debug_store cpu_debug_store;
-+      /*
-+       * The actual PEBS/BTS buffers must be mapped to user space
-+       * Reserve enough fixmap PTEs.
-+       */
-+      struct debug_store_buffers cpu_debug_buffers;
-+#endif
- };
- 
- #define CPU_ENTRY_AREA_SIZE   (sizeof(struct cpu_entry_area))
-diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
-new file mode 100644
-index 000000000000..62a9f4966b42
---- /dev/null
-+++ b/arch/x86/include/asm/intel_ds.h
-@@ -0,0 +1,36 @@
-+#ifndef _ASM_INTEL_DS_H
-+#define _ASM_INTEL_DS_H
-+
-+#include <linux/percpu-defs.h>
-+
-+#define BTS_BUFFER_SIZE               (PAGE_SIZE << 4)
-+#define PEBS_BUFFER_SIZE      (PAGE_SIZE << 4)
-+
-+/* The maximal number of PEBS events: */
-+#define MAX_PEBS_EVENTS               8
-+
-+/*
-+ * A debug store configuration.
-+ *
-+ * We only support architectures that use 64bit fields.
-+ */
-+struct debug_store {
-+      u64     bts_buffer_base;
-+      u64     bts_index;
-+      u64     bts_absolute_maximum;
-+      u64     bts_interrupt_threshold;
-+      u64     pebs_buffer_base;
-+      u64     pebs_index;
-+      u64     pebs_absolute_maximum;
-+      u64     pebs_interrupt_threshold;
-+      u64     pebs_event_reset[MAX_PEBS_EVENTS];
-+} __aligned(PAGE_SIZE);
-+
-+DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
-+
-+struct debug_store_buffers {
-+      char    bts_buffer[BTS_BUFFER_SIZE];
-+      char    pebs_buffer[PEBS_BUFFER_SIZE];
-+};
-+
-+#endif
-diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
-index 98e36e0c791c..21a4ed789ec0 100644
---- a/arch/x86/events/intel/ds.c
-+++ b/arch/x86/events/intel/ds.c
-@@ -7,11 +7,12 @@
- 
- #include "../perf_event.h"
- 
-+/* Waste a full page so it can be mapped into the cpu_entry_area */
-+DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
-+
- /* The size of a BTS record in bytes: */
- #define BTS_RECORD_SIZE               24
- 
--#define BTS_BUFFER_SIZE               (PAGE_SIZE << 4)
--#define PEBS_BUFFER_SIZE      (PAGE_SIZE << 4)
- #define PEBS_FIXUP_SIZE               PAGE_SIZE
- 
- /*
-diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
-index fe814fd5e014..b9283cc27622 100644
---- a/arch/x86/mm/cpu_entry_area.c
-+++ b/arch/x86/mm/cpu_entry_area.c
-@@ -38,6 +38,32 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
-               cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
- }
- 
-+static void percpu_setup_debug_store(int cpu)
-+{
-+#ifdef CONFIG_CPU_SUP_INTEL
-+      int npages;
-+      void *cea;
-+
-+      if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
-+              return;
-+
-+      cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
-+      npages = sizeof(struct debug_store) / PAGE_SIZE;
-+      BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
-+      cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
-+                           PAGE_KERNEL);
-+
-+      cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
-+      /*
-+       * Force the population of PMDs for not yet allocated per cpu
-+       * memory like debug store buffers.
-+       */
-+      npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
-+      for (; npages; npages--, cea += PAGE_SIZE)
-+              cea_set_pte(cea, 0, PAGE_NONE);
-+#endif
-+}
-+
- /* Setup the fixmap mappings only once per-processor */
- static void __init setup_cpu_entry_area(int cpu)
- {
-@@ -109,6 +135,7 @@ static void __init setup_cpu_entry_area(int cpu)
-       cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
-                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
- #endif
-+      percpu_setup_debug_store(cpu);
- }
- 
- static __init void setup_cpu_entry_area_ptes(void)
--- 
-2.14.2
-
diff --git a/patches/kernel/0204-x86-mm-pti-Map-ESPFIX-into-user-space.patch b/patches/kernel/0204-x86-mm-pti-Map-ESPFIX-into-user-space.patch

new file mode 100644 (file)

index 0000000..0db3a23
--- /dev/null
+++ b/patches/kernel/0204-x86-mm-pti-Map-ESPFIX-into-user-space.patch
@@ -0,0 +1,64 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Fri, 15 Dec 2017 22:08:18 +0100
+Subject: [PATCH] x86/mm/pti: Map ESPFIX into user space
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Map the ESPFIX pages into user space when PTI is enabled.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 4b6bbe95b87966ba08999574db65c93c5e925a36)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f5103cc3035ae6d1816404696ee2eb06d53b6709)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/pti.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+index 0e78797650a7..b1c38ef9fbbb 100644
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -287,6 +287,16 @@ static void __init pti_clone_user_shared(void)
+       pti_clone_p4d(CPU_ENTRY_AREA_BASE);
+ }
+ 
++/*
++ * Clone the ESPFIX P4D into the user space visinble page table
++ */
++static void __init pti_setup_espfix64(void)
++{
++#ifdef CONFIG_X86_ESPFIX64
++      pti_clone_p4d(ESPFIX_BASE_ADDR);
++#endif
++}
++
+ /*
+  * Clone the populated PMDs of the entry and irqentry text and force it RO.
+  */
+@@ -308,4 +318,5 @@ void __init pti_init(void)
+ 
+       pti_clone_user_shared();
+       pti_clone_entry_text();
++      pti_setup_espfix64();
+ }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0205-x86-cpu_entry_area-Add-debugstore-entries-to-cpu_ent.patch b/patches/kernel/0205-x86-cpu_entry_area-Add-debugstore-entries-to-cpu_ent.patch

new file mode 100644 (file)

index 0000000..c9ca293
--- /dev/null
+++ b/patches/kernel/0205-x86-cpu_entry_area-Add-debugstore-entries-to-cpu_ent.patch
@@ -0,0 +1,244 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:49 +0100
+Subject: [PATCH] x86/cpu_entry_area: Add debugstore entries to cpu_entry_area
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The Intel PEBS/BTS debug store is a design trainwreck as it expects virtual
+addresses which must be visible in any execution context.
+
+So it is required to make these mappings visible to user space when kernel
+page table isolation is active.
+
+Provide enough room for the buffer mappings in the cpu_entry_area so the
+buffers are available in the user space visible page tables.
+
+At the point where the kernel side entry area is populated there is no
+buffer available yet, but the kernel PMD must be populated. To achieve this
+set the entries for these buffers to non present.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 10043e02db7f8a4161f76434931051e7d797a5f6)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 4b9996f9c2d35d23a9fa2afe4f161402e6f28309)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/events/perf_event.h          | 21 ++------------------
+ arch/x86/include/asm/cpu_entry_area.h | 13 +++++++++++++
+ arch/x86/include/asm/intel_ds.h       | 36 +++++++++++++++++++++++++++++++++++
+ arch/x86/events/intel/ds.c            |  5 +++--
+ arch/x86/mm/cpu_entry_area.c          | 27 ++++++++++++++++++++++++++
+ 5 files changed, 81 insertions(+), 21 deletions(-)
+ create mode 100644 arch/x86/include/asm/intel_ds.h
+
+diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
+index 590eaf7c2c3e..308bc14f58af 100644
+--- a/arch/x86/events/perf_event.h
++++ b/arch/x86/events/perf_event.h
+@@ -14,6 +14,8 @@
+ 
+ #include <linux/perf_event.h>
+ 
++#include <asm/intel_ds.h>
++
+ /* To enable MSR tracing please use the generic trace points. */
+ 
+ /*
+@@ -77,8 +79,6 @@ struct amd_nb {
+       struct event_constraint event_constraints[X86_PMC_IDX_MAX];
+ };
+ 
+-/* The maximal number of PEBS events: */
+-#define MAX_PEBS_EVENTS               8
+ #define PEBS_COUNTER_MASK     ((1ULL << MAX_PEBS_EVENTS) - 1)
+ 
+ /*
+@@ -95,23 +95,6 @@ struct amd_nb {
+       PERF_SAMPLE_TRANSACTION | \
+       PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
+ 
+-/*
+- * A debug store configuration.
+- *
+- * We only support architectures that use 64bit fields.
+- */
+-struct debug_store {
+-      u64     bts_buffer_base;
+-      u64     bts_index;
+-      u64     bts_absolute_maximum;
+-      u64     bts_interrupt_threshold;
+-      u64     pebs_buffer_base;
+-      u64     pebs_index;
+-      u64     pebs_absolute_maximum;
+-      u64     pebs_interrupt_threshold;
+-      u64     pebs_event_reset[MAX_PEBS_EVENTS];
+-};
+-
+ #define PEBS_REGS \
+       (PERF_REG_X86_AX | \
+        PERF_REG_X86_BX | \
+diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
+index 2fbc69a0916e..4a7884b8dca5 100644
+--- a/arch/x86/include/asm/cpu_entry_area.h
++++ b/arch/x86/include/asm/cpu_entry_area.h
+@@ -5,6 +5,7 @@
+ 
+ #include <linux/percpu-defs.h>
+ #include <asm/processor.h>
++#include <asm/intel_ds.h>
+ 
+ /*
+  * cpu_entry_area is a percpu region that contains things needed by the CPU
+@@ -40,6 +41,18 @@ struct cpu_entry_area {
+        */
+       char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+ #endif
++#ifdef CONFIG_CPU_SUP_INTEL
++      /*
++       * Per CPU debug store for Intel performance monitoring. Wastes a
++       * full page at the moment.
++       */
++      struct debug_store cpu_debug_store;
++      /*
++       * The actual PEBS/BTS buffers must be mapped to user space
++       * Reserve enough fixmap PTEs.
++       */
++      struct debug_store_buffers cpu_debug_buffers;
++#endif
+ };
+ 
+ #define CPU_ENTRY_AREA_SIZE   (sizeof(struct cpu_entry_area))
+diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
+new file mode 100644
+index 000000000000..62a9f4966b42
+--- /dev/null
++++ b/arch/x86/include/asm/intel_ds.h
+@@ -0,0 +1,36 @@
++#ifndef _ASM_INTEL_DS_H
++#define _ASM_INTEL_DS_H
++
++#include <linux/percpu-defs.h>
++
++#define BTS_BUFFER_SIZE               (PAGE_SIZE << 4)
++#define PEBS_BUFFER_SIZE      (PAGE_SIZE << 4)
++
++/* The maximal number of PEBS events: */
++#define MAX_PEBS_EVENTS               8
++
++/*
++ * A debug store configuration.
++ *
++ * We only support architectures that use 64bit fields.
++ */
++struct debug_store {
++      u64     bts_buffer_base;
++      u64     bts_index;
++      u64     bts_absolute_maximum;
++      u64     bts_interrupt_threshold;
++      u64     pebs_buffer_base;
++      u64     pebs_index;
++      u64     pebs_absolute_maximum;
++      u64     pebs_interrupt_threshold;
++      u64     pebs_event_reset[MAX_PEBS_EVENTS];
++} __aligned(PAGE_SIZE);
++
++DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
++
++struct debug_store_buffers {
++      char    bts_buffer[BTS_BUFFER_SIZE];
++      char    pebs_buffer[PEBS_BUFFER_SIZE];
++};
++
++#endif
+diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
+index 98e36e0c791c..21a4ed789ec0 100644
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -7,11 +7,12 @@
+ 
+ #include "../perf_event.h"
+ 
++/* Waste a full page so it can be mapped into the cpu_entry_area */
++DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
++
+ /* The size of a BTS record in bytes: */
+ #define BTS_RECORD_SIZE               24
+ 
+-#define BTS_BUFFER_SIZE               (PAGE_SIZE << 4)
+-#define PEBS_BUFFER_SIZE      (PAGE_SIZE << 4)
+ #define PEBS_FIXUP_SIZE               PAGE_SIZE
+ 
+ /*
+diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
+index fe814fd5e014..b9283cc27622 100644
+--- a/arch/x86/mm/cpu_entry_area.c
++++ b/arch/x86/mm/cpu_entry_area.c
+@@ -38,6 +38,32 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
+               cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
+ }
+ 
++static void percpu_setup_debug_store(int cpu)
++{
++#ifdef CONFIG_CPU_SUP_INTEL
++      int npages;
++      void *cea;
++
++      if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
++              return;
++
++      cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
++      npages = sizeof(struct debug_store) / PAGE_SIZE;
++      BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
++      cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
++                           PAGE_KERNEL);
++
++      cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
++      /*
++       * Force the population of PMDs for not yet allocated per cpu
++       * memory like debug store buffers.
++       */
++      npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
++      for (; npages; npages--, cea += PAGE_SIZE)
++              cea_set_pte(cea, 0, PAGE_NONE);
++#endif
++}
++
+ /* Setup the fixmap mappings only once per-processor */
+ static void __init setup_cpu_entry_area(int cpu)
+ {
+@@ -109,6 +135,7 @@ static void __init setup_cpu_entry_area(int cpu)
+       cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
+                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+ #endif
++      percpu_setup_debug_store(cpu);
+ }
+ 
+ static __init void setup_cpu_entry_area_ptes(void)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0205-x86-events-intel-ds-Map-debug-buffers-in-cpu_entry_a.patch b/patches/kernel/0205-x86-events-intel-ds-Map-debug-buffers-in-cpu_entry_a.patch

deleted file mode 100644 (file)

index ef63988..0000000
--- a/patches/kernel/0205-x86-events-intel-ds-Map-debug-buffers-in-cpu_entry_a.patch
+++ /dev/null
@@ -1,280 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Hugh Dickins <hughd@google.com>
-Date: Mon, 4 Dec 2017 15:07:50 +0100
-Subject: [PATCH] x86/events/intel/ds: Map debug buffers in cpu_entry_area
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The BTS and PEBS buffers both have their virtual addresses programmed into
-the hardware.  This means that any access to them is performed via the page
-tables.  The times that the hardware accesses these are entirely dependent
-on how the performance monitoring hardware events are set up.  In other
-words, there is no way for the kernel to tell when the hardware might
-access these buffers.
-
-To avoid perf crashes, place 'debug_store' allocate pages and map them into
-the cpu_entry_area.
-
-The PEBS fixup buffer does not need this treatment.
-
-[ tglx: Got rid of the kaiser_add_mapping() complication ]
-
-Signed-off-by: Hugh Dickins <hughd@google.com>
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit c1961a4631daef4aeabee8e368b1b13e8f173c91)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 569dedbb62e16e3268f006dcf745b8d27690ef91)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/events/perf_event.h |   2 +
- arch/x86/events/intel/ds.c   | 125 +++++++++++++++++++++++++++----------------
- 2 files changed, 82 insertions(+), 45 deletions(-)
-
-diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
-index 308bc14f58af..eb0876475f18 100644
---- a/arch/x86/events/perf_event.h
-+++ b/arch/x86/events/perf_event.h
-@@ -199,6 +199,8 @@ struct cpu_hw_events {
-        * Intel DebugStore bits
-        */
-       struct debug_store      *ds;
-+      void                    *ds_pebs_vaddr;
-+      void                    *ds_bts_vaddr;
-       u64                     pebs_enabled;
-       int                     n_pebs;
-       int                     n_large_pebs;
-diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
-index 21a4ed789ec0..85df1f12c49e 100644
---- a/arch/x86/events/intel/ds.c
-+++ b/arch/x86/events/intel/ds.c
-@@ -2,6 +2,7 @@
- #include <linux/types.h>
- #include <linux/slab.h>
- 
-+#include <asm/cpu_entry_area.h>
- #include <asm/perf_event.h>
- #include <asm/insn.h>
- 
-@@ -279,17 +280,52 @@ void fini_debug_store_on_cpu(int cpu)
- 
- static DEFINE_PER_CPU(void *, insn_buffer);
- 
--static int alloc_pebs_buffer(int cpu)
-+static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
- {
--      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-+      phys_addr_t pa;
-+      size_t msz = 0;
-+
-+      pa = virt_to_phys(addr);
-+      for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
-+              cea_set_pte(cea, pa, prot);
-+}
-+
-+static void ds_clear_cea(void *cea, size_t size)
-+{
-+      size_t msz = 0;
-+
-+      for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
-+              cea_set_pte(cea, 0, PAGE_NONE);
-+}
-+
-+static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
-+{
-+      unsigned int order = get_order(size);
-       int node = cpu_to_node(cpu);
--      int max;
--      void *buffer, *ibuffer;
-+      struct page *page;
-+
-+      page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
-+      return page ? page_address(page) : NULL;
-+}
-+
-+static void dsfree_pages(const void *buffer, size_t size)
-+{
-+      if (buffer)
-+              free_pages((unsigned long)buffer, get_order(size));
-+}
-+
-+static int alloc_pebs_buffer(int cpu)
-+{
-+      struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
-+      struct debug_store *ds = hwev->ds;
-+      size_t bsiz = x86_pmu.pebs_buffer_size;
-+      int max, node = cpu_to_node(cpu);
-+      void *buffer, *ibuffer, *cea;
- 
-       if (!x86_pmu.pebs)
-               return 0;
- 
--      buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
-+      buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
-       if (unlikely(!buffer))
-               return -ENOMEM;
- 
-@@ -300,25 +336,27 @@ static int alloc_pebs_buffer(int cpu)
-       if (x86_pmu.intel_cap.pebs_format < 2) {
-               ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
-               if (!ibuffer) {
--                      kfree(buffer);
-+                      dsfree_pages(buffer, bsiz);
-                       return -ENOMEM;
-               }
-               per_cpu(insn_buffer, cpu) = ibuffer;
-       }
--
--      max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
--
--      ds->pebs_buffer_base = (u64)(unsigned long)buffer;
-+      hwev->ds_pebs_vaddr = buffer;
-+      /* Update the cpu entry area mapping */
-+      cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
-+      ds->pebs_buffer_base = (unsigned long) cea;
-+      ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
-       ds->pebs_index = ds->pebs_buffer_base;
--      ds->pebs_absolute_maximum = ds->pebs_buffer_base +
--              max * x86_pmu.pebs_record_size;
--
-+      max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
-+      ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
-       return 0;
- }
- 
- static void release_pebs_buffer(int cpu)
- {
--      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-+      struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
-+      struct debug_store *ds = hwev->ds;
-+      void *cea;
- 
-       if (!ds || !x86_pmu.pebs)
-               return;
-@@ -326,73 +364,70 @@ static void release_pebs_buffer(int cpu)
-       kfree(per_cpu(insn_buffer, cpu));
-       per_cpu(insn_buffer, cpu) = NULL;
- 
--      kfree((void *)(unsigned long)ds->pebs_buffer_base);
-+      /* Clear the fixmap */
-+      cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
-+      ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
-       ds->pebs_buffer_base = 0;
-+      dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
-+      hwev->ds_pebs_vaddr = NULL;
- }
- 
- static int alloc_bts_buffer(int cpu)
- {
--      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
--      int node = cpu_to_node(cpu);
--      int max, thresh;
--      void *buffer;
-+      struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
-+      struct debug_store *ds = hwev->ds;
-+      void *buffer, *cea;
-+      int max;
- 
-       if (!x86_pmu.bts)
-               return 0;
- 
--      buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
-+      buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
-       if (unlikely(!buffer)) {
-               WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
-               return -ENOMEM;
-       }
--
--      max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
--      thresh = max / 16;
--
--      ds->bts_buffer_base = (u64)(unsigned long)buffer;
-+      hwev->ds_bts_vaddr = buffer;
-+      /* Update the fixmap */
-+      cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
-+      ds->bts_buffer_base = (unsigned long) cea;
-+      ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
-       ds->bts_index = ds->bts_buffer_base;
--      ds->bts_absolute_maximum = ds->bts_buffer_base +
--              max * BTS_RECORD_SIZE;
--      ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
--              thresh * BTS_RECORD_SIZE;
--
-+      max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
-+      ds->bts_absolute_maximum = ds->bts_buffer_base + max;
-+      ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
-       return 0;
- }
- 
- static void release_bts_buffer(int cpu)
- {
--      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-+      struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
-+      struct debug_store *ds = hwev->ds;
-+      void *cea;
- 
-       if (!ds || !x86_pmu.bts)
-               return;
- 
--      kfree((void *)(unsigned long)ds->bts_buffer_base);
-+      /* Clear the fixmap */
-+      cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
-+      ds_clear_cea(cea, BTS_BUFFER_SIZE);
-       ds->bts_buffer_base = 0;
-+      dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
-+      hwev->ds_bts_vaddr = NULL;
- }
- 
- static int alloc_ds_buffer(int cpu)
- {
--      int node = cpu_to_node(cpu);
--      struct debug_store *ds;
--
--      ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
--      if (unlikely(!ds))
--              return -ENOMEM;
-+      struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
- 
-+      memset(ds, 0, sizeof(*ds));
-       per_cpu(cpu_hw_events, cpu).ds = ds;
--
-       return 0;
- }
- 
- static void release_ds_buffer(int cpu)
- {
--      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
--
--      if (!ds)
--              return;
--
-       per_cpu(cpu_hw_events, cpu).ds = NULL;
--      kfree(ds);
- }
- 
- void release_ds_buffers(void)
--- 
-2.14.2
-
diff --git a/patches/kernel/0206-x86-events-intel-ds-Map-debug-buffers-in-cpu_entry_a.patch b/patches/kernel/0206-x86-events-intel-ds-Map-debug-buffers-in-cpu_entry_a.patch

new file mode 100644 (file)

index 0000000..ef63988
--- /dev/null
+++ b/patches/kernel/0206-x86-events-intel-ds-Map-debug-buffers-in-cpu_entry_a.patch
@@ -0,0 +1,280 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 4 Dec 2017 15:07:50 +0100
+Subject: [PATCH] x86/events/intel/ds: Map debug buffers in cpu_entry_area
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The BTS and PEBS buffers both have their virtual addresses programmed into
+the hardware.  This means that any access to them is performed via the page
+tables.  The times that the hardware accesses these are entirely dependent
+on how the performance monitoring hardware events are set up.  In other
+words, there is no way for the kernel to tell when the hardware might
+access these buffers.
+
+To avoid perf crashes, place 'debug_store' allocate pages and map them into
+the cpu_entry_area.
+
+The PEBS fixup buffer does not need this treatment.
+
+[ tglx: Got rid of the kaiser_add_mapping() complication ]
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit c1961a4631daef4aeabee8e368b1b13e8f173c91)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 569dedbb62e16e3268f006dcf745b8d27690ef91)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/events/perf_event.h |   2 +
+ arch/x86/events/intel/ds.c   | 125 +++++++++++++++++++++++++++----------------
+ 2 files changed, 82 insertions(+), 45 deletions(-)
+
+diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
+index 308bc14f58af..eb0876475f18 100644
+--- a/arch/x86/events/perf_event.h
++++ b/arch/x86/events/perf_event.h
+@@ -199,6 +199,8 @@ struct cpu_hw_events {
+        * Intel DebugStore bits
+        */
+       struct debug_store      *ds;
++      void                    *ds_pebs_vaddr;
++      void                    *ds_bts_vaddr;
+       u64                     pebs_enabled;
+       int                     n_pebs;
+       int                     n_large_pebs;
+diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
+index 21a4ed789ec0..85df1f12c49e 100644
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -2,6 +2,7 @@
+ #include <linux/types.h>
+ #include <linux/slab.h>
+ 
++#include <asm/cpu_entry_area.h>
+ #include <asm/perf_event.h>
+ #include <asm/insn.h>
+ 
+@@ -279,17 +280,52 @@ void fini_debug_store_on_cpu(int cpu)
+ 
+ static DEFINE_PER_CPU(void *, insn_buffer);
+ 
+-static int alloc_pebs_buffer(int cpu)
++static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
+ {
+-      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
++      phys_addr_t pa;
++      size_t msz = 0;
++
++      pa = virt_to_phys(addr);
++      for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
++              cea_set_pte(cea, pa, prot);
++}
++
++static void ds_clear_cea(void *cea, size_t size)
++{
++      size_t msz = 0;
++
++      for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
++              cea_set_pte(cea, 0, PAGE_NONE);
++}
++
++static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
++{
++      unsigned int order = get_order(size);
+       int node = cpu_to_node(cpu);
+-      int max;
+-      void *buffer, *ibuffer;
++      struct page *page;
++
++      page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
++      return page ? page_address(page) : NULL;
++}
++
++static void dsfree_pages(const void *buffer, size_t size)
++{
++      if (buffer)
++              free_pages((unsigned long)buffer, get_order(size));
++}
++
++static int alloc_pebs_buffer(int cpu)
++{
++      struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
++      struct debug_store *ds = hwev->ds;
++      size_t bsiz = x86_pmu.pebs_buffer_size;
++      int max, node = cpu_to_node(cpu);
++      void *buffer, *ibuffer, *cea;
+ 
+       if (!x86_pmu.pebs)
+               return 0;
+ 
+-      buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
++      buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
+       if (unlikely(!buffer))
+               return -ENOMEM;
+ 
+@@ -300,25 +336,27 @@ static int alloc_pebs_buffer(int cpu)
+       if (x86_pmu.intel_cap.pebs_format < 2) {
+               ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+               if (!ibuffer) {
+-                      kfree(buffer);
++                      dsfree_pages(buffer, bsiz);
+                       return -ENOMEM;
+               }
+               per_cpu(insn_buffer, cpu) = ibuffer;
+       }
+-
+-      max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
+-
+-      ds->pebs_buffer_base = (u64)(unsigned long)buffer;
++      hwev->ds_pebs_vaddr = buffer;
++      /* Update the cpu entry area mapping */
++      cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
++      ds->pebs_buffer_base = (unsigned long) cea;
++      ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
+       ds->pebs_index = ds->pebs_buffer_base;
+-      ds->pebs_absolute_maximum = ds->pebs_buffer_base +
+-              max * x86_pmu.pebs_record_size;
+-
++      max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
++      ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
+       return 0;
+ }
+ 
+ static void release_pebs_buffer(int cpu)
+ {
+-      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
++      struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
++      struct debug_store *ds = hwev->ds;
++      void *cea;
+ 
+       if (!ds || !x86_pmu.pebs)
+               return;
+@@ -326,73 +364,70 @@ static void release_pebs_buffer(int cpu)
+       kfree(per_cpu(insn_buffer, cpu));
+       per_cpu(insn_buffer, cpu) = NULL;
+ 
+-      kfree((void *)(unsigned long)ds->pebs_buffer_base);
++      /* Clear the fixmap */
++      cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
++      ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
+       ds->pebs_buffer_base = 0;
++      dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
++      hwev->ds_pebs_vaddr = NULL;
+ }
+ 
+ static int alloc_bts_buffer(int cpu)
+ {
+-      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+-      int node = cpu_to_node(cpu);
+-      int max, thresh;
+-      void *buffer;
++      struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
++      struct debug_store *ds = hwev->ds;
++      void *buffer, *cea;
++      int max;
+ 
+       if (!x86_pmu.bts)
+               return 0;
+ 
+-      buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
++      buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
+       if (unlikely(!buffer)) {
+               WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
+               return -ENOMEM;
+       }
+-
+-      max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
+-      thresh = max / 16;
+-
+-      ds->bts_buffer_base = (u64)(unsigned long)buffer;
++      hwev->ds_bts_vaddr = buffer;
++      /* Update the fixmap */
++      cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
++      ds->bts_buffer_base = (unsigned long) cea;
++      ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
+       ds->bts_index = ds->bts_buffer_base;
+-      ds->bts_absolute_maximum = ds->bts_buffer_base +
+-              max * BTS_RECORD_SIZE;
+-      ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
+-              thresh * BTS_RECORD_SIZE;
+-
++      max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
++      ds->bts_absolute_maximum = ds->bts_buffer_base + max;
++      ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
+       return 0;
+ }
+ 
+ static void release_bts_buffer(int cpu)
+ {
+-      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
++      struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
++      struct debug_store *ds = hwev->ds;
++      void *cea;
+ 
+       if (!ds || !x86_pmu.bts)
+               return;
+ 
+-      kfree((void *)(unsigned long)ds->bts_buffer_base);
++      /* Clear the fixmap */
++      cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
++      ds_clear_cea(cea, BTS_BUFFER_SIZE);
+       ds->bts_buffer_base = 0;
++      dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
++      hwev->ds_bts_vaddr = NULL;
+ }
+ 
+ static int alloc_ds_buffer(int cpu)
+ {
+-      int node = cpu_to_node(cpu);
+-      struct debug_store *ds;
+-
+-      ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
+-      if (unlikely(!ds))
+-              return -ENOMEM;
++      struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
+ 
++      memset(ds, 0, sizeof(*ds));
+       per_cpu(cpu_hw_events, cpu).ds = ds;
+-
+       return 0;
+ }
+ 
+ static void release_ds_buffer(int cpu)
+ {
+-      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+-
+-      if (!ds)
+-              return;
+-
+       per_cpu(cpu_hw_events, cpu).ds = NULL;
+-      kfree(ds);
+ }
+ 
+ void release_ds_buffers(void)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0206-x86-mm-64-Make-a-full-PGD-entry-size-hole-in-the-mem.patch b/patches/kernel/0206-x86-mm-64-Make-a-full-PGD-entry-size-hole-in-the-mem.patch

deleted file mode 100644 (file)

index e408245..0000000
--- a/patches/kernel/0206-x86-mm-64-Make-a-full-PGD-entry-size-hole-in-the-mem.patch
+++ /dev/null
@@ -1,74 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Tue, 12 Dec 2017 07:56:44 -0800
-Subject: [PATCH] x86/mm/64: Make a full PGD-entry size hole in the memory map
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Shrink vmalloc space from 16384TiB to 12800TiB to enlarge the hole starting
-at 0xff90000000000000 to be a full PGD entry.
-
-A subsequent patch will use this hole for the pagetable isolation LDT
-alias.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Kees Cook <keescook@chromium.org>
-Cc: Kirill A. Shutemov <kirill@shutemov.name>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 9f449772a3106bcdd4eb8fdeb281147b0e99fb30)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 29b1c137d449dfc8fdcb476158f236625691fd28)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/x86/x86_64/mm.txt         | 4 ++--
- arch/x86/include/asm/pgtable_64_types.h | 4 ++--
- 2 files changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
-index 51101708a03a..496a1dbf139d 100644
---- a/Documentation/x86/x86_64/mm.txt
-+++ b/Documentation/x86/x86_64/mm.txt
-@@ -29,8 +29,8 @@ Virtual memory map with 5 level page tables:
- hole caused by [56:63] sign extension
- ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
- ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
--ff90000000000000 - ff91ffffffffffff (=49 bits) hole
--ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
-+ff90000000000000 - ff9fffffffffffff (=52 bits) hole
-+ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
- ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
- ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
- ... unused hole ...
-diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
-index 42e2750da525..5932dead34ee 100644
---- a/arch/x86/include/asm/pgtable_64_types.h
-+++ b/arch/x86/include/asm/pgtable_64_types.h
-@@ -78,8 +78,8 @@ typedef struct { pteval_t pte; } pte_t;
- #define MAXMEM                        _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
- 
- #ifdef CONFIG_X86_5LEVEL
--# define VMALLOC_SIZE_TB      _AC(16384, UL)
--# define __VMALLOC_BASE               _AC(0xff92000000000000, UL)
-+# define VMALLOC_SIZE_TB      _AC(12800, UL)
-+# define __VMALLOC_BASE               _AC(0xffa0000000000000, UL)
- # define __VMEMMAP_BASE               _AC(0xffd4000000000000, UL)
- #else
- # define VMALLOC_SIZE_TB      _AC(32, UL)
--- 
-2.14.2
-
diff --git a/patches/kernel/0207-x86-mm-64-Make-a-full-PGD-entry-size-hole-in-the-mem.patch b/patches/kernel/0207-x86-mm-64-Make-a-full-PGD-entry-size-hole-in-the-mem.patch

new file mode 100644 (file)

index 0000000..e408245
--- /dev/null
+++ b/patches/kernel/0207-x86-mm-64-Make-a-full-PGD-entry-size-hole-in-the-mem.patch
@@ -0,0 +1,74 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 12 Dec 2017 07:56:44 -0800
+Subject: [PATCH] x86/mm/64: Make a full PGD-entry size hole in the memory map
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Shrink vmalloc space from 16384TiB to 12800TiB to enlarge the hole starting
+at 0xff90000000000000 to be a full PGD entry.
+
+A subsequent patch will use this hole for the pagetable isolation LDT
+alias.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 9f449772a3106bcdd4eb8fdeb281147b0e99fb30)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 29b1c137d449dfc8fdcb476158f236625691fd28)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/x86/x86_64/mm.txt         | 4 ++--
+ arch/x86/include/asm/pgtable_64_types.h | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
+index 51101708a03a..496a1dbf139d 100644
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -29,8 +29,8 @@ Virtual memory map with 5 level page tables:
+ hole caused by [56:63] sign extension
+ ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
+ ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
+-ff90000000000000 - ff91ffffffffffff (=49 bits) hole
+-ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
++ff90000000000000 - ff9fffffffffffff (=52 bits) hole
++ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
+ ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
+ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
+ ... unused hole ...
+diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
+index 42e2750da525..5932dead34ee 100644
+--- a/arch/x86/include/asm/pgtable_64_types.h
++++ b/arch/x86/include/asm/pgtable_64_types.h
+@@ -78,8 +78,8 @@ typedef struct { pteval_t pte; } pte_t;
+ #define MAXMEM                        _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+ 
+ #ifdef CONFIG_X86_5LEVEL
+-# define VMALLOC_SIZE_TB      _AC(16384, UL)
+-# define __VMALLOC_BASE               _AC(0xff92000000000000, UL)
++# define VMALLOC_SIZE_TB      _AC(12800, UL)
++# define __VMALLOC_BASE               _AC(0xffa0000000000000, UL)
+ # define __VMEMMAP_BASE               _AC(0xffd4000000000000, UL)
+ #else
+ # define VMALLOC_SIZE_TB      _AC(32, UL)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0207-x86-pti-Put-the-LDT-in-its-own-PGD-if-PTI-is-on.patch b/patches/kernel/0207-x86-pti-Put-the-LDT-in-its-own-PGD-if-PTI-is-on.patch

deleted file mode 100644 (file)

index 74b842f..0000000
--- a/patches/kernel/0207-x86-pti-Put-the-LDT-in-its-own-PGD-if-PTI-is-on.patch
+++ /dev/null
@@ -1,466 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Tue, 12 Dec 2017 07:56:45 -0800
-Subject: [PATCH] x86/pti: Put the LDT in its own PGD if PTI is on
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-With PTI enabled, the LDT must be mapped in the usermode tables somewhere.
-The LDT is per process, i.e. per mm.
-
-An earlier approach mapped the LDT on context switch into a fixmap area,
-but that's a big overhead and exhausted the fixmap space when NR_CPUS got
-big.
-
-Take advantage of the fact that there is an address space hole which
-provides a completely unused pgd. Use this pgd to manage per-mm LDT
-mappings.
-
-This has a down side: the LDT isn't (currently) randomized, and an attack
-that can write the LDT is instant root due to call gates (thanks, AMD, for
-leaving call gates in AMD64 but designing them wrong so they're only useful
-for exploits).  This can be mitigated by making the LDT read-only or
-randomizing the mapping, either of which is strightforward on top of this
-patch.
-
-This will significantly slow down LDT users, but that shouldn't matter for
-important workloads -- the LDT is only used by DOSEMU(2), Wine, and very
-old libc implementations.
-
-[ tglx: Cleaned it up. ]
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Kees Cook <keescook@chromium.org>
-Cc: Kirill A. Shutemov <kirill@shutemov.name>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit f55f0501cbf65ec41cca5058513031b711730b1d)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit c250643846b45ea6782fb0cfcc15e8cd34744bc7)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/x86/x86_64/mm.txt         |   3 +-
- arch/x86/include/asm/mmu_context.h      |  59 ++++++++++++--
- arch/x86/include/asm/pgtable_64_types.h |   4 +
- arch/x86/include/asm/processor.h        |  23 ++++--
- arch/x86/kernel/ldt.c                   | 139 +++++++++++++++++++++++++++++++-
- arch/x86/mm/dump_pagetables.c           |   9 +++
- 6 files changed, 220 insertions(+), 17 deletions(-)
-
-diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
-index 496a1dbf139d..ad41b3813f0a 100644
---- a/Documentation/x86/x86_64/mm.txt
-+++ b/Documentation/x86/x86_64/mm.txt
-@@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
- ... unused hole ...
- ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
- ... unused hole ...
-+fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
- fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
- ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
- ... unused hole ...
-@@ -29,7 +30,7 @@ Virtual memory map with 5 level page tables:
- hole caused by [56:63] sign extension
- ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
- ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
--ff90000000000000 - ff9fffffffffffff (=52 bits) hole
-+ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
- ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
- ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
- ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
-diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
-index 89a01ad7e370..9e3546e1c0f4 100644
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -49,10 +49,33 @@ struct ldt_struct {
-        * call gates.  On native, we could merge the ldt_struct and LDT
-        * allocations, but it's not worth trying to optimize.
-        */
--      struct desc_struct *entries;
--      unsigned int nr_entries;
-+      struct desc_struct      *entries;
-+      unsigned int            nr_entries;
-+
-+      /*
-+       * If PTI is in use, then the entries array is not mapped while we're
-+       * in user mode.  The whole array will be aliased at the addressed
-+       * given by ldt_slot_va(slot).  We use two slots so that we can allocate
-+       * and map, and enable a new LDT without invalidating the mapping
-+       * of an older, still-in-use LDT.
-+       *
-+       * slot will be -1 if this LDT doesn't have an alias mapping.
-+       */
-+      int                     slot;
- };
- 
-+/* This is a multiple of PAGE_SIZE. */
-+#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
-+
-+static inline void *ldt_slot_va(int slot)
-+{
-+#ifdef CONFIG_X86_64
-+      return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
-+#else
-+      BUG();
-+#endif
-+}
-+
- /*
-  * Used for LDT copy/destruction.
-  */
-@@ -63,6 +86,7 @@ static inline void init_new_context_ldt(struct mm_struct *mm)
- }
- int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
- void destroy_context_ldt(struct mm_struct *mm);
-+void ldt_arch_exit_mmap(struct mm_struct *mm);
- #else /* CONFIG_MODIFY_LDT_SYSCALL */
- static inline void init_new_context_ldt(struct mm_struct *mm) { }
- static inline int ldt_dup_context(struct mm_struct *oldmm,
-@@ -70,7 +94,8 @@ static inline int ldt_dup_context(struct mm_struct *oldmm,
- {
-       return 0;
- }
--static inline void destroy_context_ldt(struct mm_struct *mm) {}
-+static inline void destroy_context_ldt(struct mm_struct *mm) { }
-+static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
- #endif
- 
- static inline void load_mm_ldt(struct mm_struct *mm)
-@@ -95,10 +120,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
-        * that we can see.
-        */
- 
--      if (unlikely(ldt))
--              set_ldt(ldt->entries, ldt->nr_entries);
--      else
-+      if (unlikely(ldt)) {
-+              if (static_cpu_has(X86_FEATURE_PTI)) {
-+                      if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
-+                              /*
-+                               * Whoops -- either the new LDT isn't mapped
-+                               * (if slot == -1) or is mapped into a bogus
-+                               * slot (if slot > 1).
-+                               */
-+                              clear_LDT();
-+                              return;
-+                      }
-+
-+                      /*
-+                       * If page table isolation is enabled, ldt->entries
-+                       * will not be mapped in the userspace pagetables.
-+                       * Tell the CPU to access the LDT through the alias
-+                       * at ldt_slot_va(ldt->slot).
-+                       */
-+                      set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
-+              } else {
-+                      set_ldt(ldt->entries, ldt->nr_entries);
-+              }
-+      } else {
-               clear_LDT();
-+      }
- #else
-       clear_LDT();
- #endif
-@@ -193,6 +239,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
- static inline void arch_exit_mmap(struct mm_struct *mm)
- {
-       paravirt_arch_exit_mmap(mm);
-+      ldt_arch_exit_mmap(mm);
- }
- 
- #ifdef CONFIG_X86_64
-diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
-index 5932dead34ee..e8a809ee0bb6 100644
---- a/arch/x86/include/asm/pgtable_64_types.h
-+++ b/arch/x86/include/asm/pgtable_64_types.h
-@@ -81,10 +81,14 @@ typedef struct { pteval_t pte; } pte_t;
- # define VMALLOC_SIZE_TB      _AC(12800, UL)
- # define __VMALLOC_BASE               _AC(0xffa0000000000000, UL)
- # define __VMEMMAP_BASE               _AC(0xffd4000000000000, UL)
-+# define LDT_PGD_ENTRY                _AC(-112, UL)
-+# define LDT_BASE_ADDR                (LDT_PGD_ENTRY << PGDIR_SHIFT)
- #else
- # define VMALLOC_SIZE_TB      _AC(32, UL)
- # define __VMALLOC_BASE               _AC(0xffffc90000000000, UL)
- # define __VMEMMAP_BASE               _AC(0xffffea0000000000, UL)
-+# define LDT_PGD_ENTRY                _AC(-4, UL)
-+# define LDT_BASE_ADDR                (LDT_PGD_ENTRY << PGDIR_SHIFT)
- #endif
- 
- #ifdef CONFIG_RANDOMIZE_MEMORY
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 935d68609922..24503521c947 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -843,13 +843,22 @@ static inline void spin_lock_prefetch(const void *x)
- 
- #else
- /*
-- * User space process size. 47bits minus one guard page.  The guard
-- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
-- * the highest possible canonical userspace address, then that
-- * syscall will enter the kernel with a non-canonical return
-- * address, and SYSRET will explode dangerously.  We avoid this
-- * particular problem by preventing anything from being mapped
-- * at the maximum canonical address.
-+ * User space process size.  This is the first address outside the user range.
-+ * There are a few constraints that determine this:
-+ *
-+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
-+ * address, then that syscall will enter the kernel with a
-+ * non-canonical return address, and SYSRET will explode dangerously.
-+ * We avoid this particular problem by preventing anything executable
-+ * from being mapped at the maximum canonical address.
-+ *
-+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
-+ * CPUs malfunction if they execute code from the highest canonical page.
-+ * They'll speculate right off the end of the canonical space, and
-+ * bad things happen.  This is worked around in the same way as the
-+ * Intel problem.
-+ *
-+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
-  */
- #define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
- 
-diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
-index 74a5aaf13f3c..eceaada581ff 100644
---- a/arch/x86/kernel/ldt.c
-+++ b/arch/x86/kernel/ldt.c
-@@ -23,6 +23,7 @@
- #include <linux/uaccess.h>
- 
- #include <asm/ldt.h>
-+#include <asm/tlb.h>
- #include <asm/desc.h>
- #include <asm/mmu_context.h>
- #include <asm/syscalls.h>
-@@ -50,13 +51,11 @@ static void refresh_ldt_segments(void)
- static void flush_ldt(void *__mm)
- {
-       struct mm_struct *mm = __mm;
--      mm_context_t *pc;
- 
-       if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
-               return;
- 
--      pc = &mm->context;
--      set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
-+      load_mm_ldt(mm);
- 
-       refresh_ldt_segments();
- }
-@@ -93,10 +92,121 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
-               return NULL;
-       }
- 
-+      /* The new LDT isn't aliased for PTI yet. */
-+      new_ldt->slot = -1;
-+
-       new_ldt->nr_entries = num_entries;
-       return new_ldt;
- }
- 
-+/*
-+ * If PTI is enabled, this maps the LDT into the kernelmode and
-+ * usermode tables for the given mm.
-+ *
-+ * There is no corresponding unmap function.  Even if the LDT is freed, we
-+ * leave the PTEs around until the slot is reused or the mm is destroyed.
-+ * This is harmless: the LDT is always in ordinary memory, and no one will
-+ * access the freed slot.
-+ *
-+ * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
-+ * it useful, and the flush would slow down modify_ldt().
-+ */
-+static int
-+map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
-+{
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+      bool is_vmalloc, had_top_level_entry;
-+      unsigned long va;
-+      spinlock_t *ptl;
-+      pgd_t *pgd;
-+      int i;
-+
-+      if (!static_cpu_has(X86_FEATURE_PTI))
-+              return 0;
-+
-+      /*
-+       * Any given ldt_struct should have map_ldt_struct() called at most
-+       * once.
-+       */
-+      WARN_ON(ldt->slot != -1);
-+
-+      /*
-+       * Did we already have the top level entry allocated?  We can't
-+       * use pgd_none() for this because it doens't do anything on
-+       * 4-level page table kernels.
-+       */
-+      pgd = pgd_offset(mm, LDT_BASE_ADDR);
-+      had_top_level_entry = (pgd->pgd != 0);
-+
-+      is_vmalloc = is_vmalloc_addr(ldt->entries);
-+
-+      for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
-+              unsigned long offset = i << PAGE_SHIFT;
-+              const void *src = (char *)ldt->entries + offset;
-+              unsigned long pfn;
-+              pte_t pte, *ptep;
-+
-+              va = (unsigned long)ldt_slot_va(slot) + offset;
-+              pfn = is_vmalloc ? vmalloc_to_pfn(src) :
-+                      page_to_pfn(virt_to_page(src));
-+              /*
-+               * Treat the PTI LDT range as a *userspace* range.
-+               * get_locked_pte() will allocate all needed pagetables
-+               * and account for them in this mm.
-+               */
-+              ptep = get_locked_pte(mm, va, &ptl);
-+              if (!ptep)
-+                      return -ENOMEM;
-+              pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL));
-+              set_pte_at(mm, va, ptep, pte);
-+              pte_unmap_unlock(ptep, ptl);
-+      }
-+
-+      if (mm->context.ldt) {
-+              /*
-+               * We already had an LDT.  The top-level entry should already
-+               * have been allocated and synchronized with the usermode
-+               * tables.
-+               */
-+              WARN_ON(!had_top_level_entry);
-+              if (static_cpu_has(X86_FEATURE_PTI))
-+                      WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
-+      } else {
-+              /*
-+               * This is the first time we're mapping an LDT for this process.
-+               * Sync the pgd to the usermode tables.
-+               */
-+              WARN_ON(had_top_level_entry);
-+              if (static_cpu_has(X86_FEATURE_PTI)) {
-+                      WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
-+                      set_pgd(kernel_to_user_pgdp(pgd), *pgd);
-+              }
-+      }
-+
-+      va = (unsigned long)ldt_slot_va(slot);
-+      flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
-+
-+      ldt->slot = slot;
-+#endif
-+      return 0;
-+}
-+
-+static void free_ldt_pgtables(struct mm_struct *mm)
-+{
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+      struct mmu_gather tlb;
-+      unsigned long start = LDT_BASE_ADDR;
-+      unsigned long end = start + (1UL << PGDIR_SHIFT);
-+
-+      if (!static_cpu_has(X86_FEATURE_PTI))
-+              return;
-+
-+      tlb_gather_mmu(&tlb, mm, start, end);
-+      free_pgd_range(&tlb, start, end, start, end);
-+      tlb_finish_mmu(&tlb, start, end);
-+#endif
-+}
-+
- /* After calling this, the LDT is immutable. */
- static void finalize_ldt_struct(struct ldt_struct *ldt)
- {
-@@ -155,6 +265,12 @@ int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
-              new_ldt->nr_entries * LDT_ENTRY_SIZE);
-       finalize_ldt_struct(new_ldt);
- 
-+      retval = map_ldt_struct(mm, new_ldt, 0);
-+      if (retval) {
-+              free_ldt_pgtables(mm);
-+              free_ldt_struct(new_ldt);
-+              goto out_unlock;
-+      }
-       mm->context.ldt = new_ldt;
- 
- out_unlock:
-@@ -173,6 +289,11 @@ void destroy_context_ldt(struct mm_struct *mm)
-       mm->context.ldt = NULL;
- }
- 
-+void ldt_arch_exit_mmap(struct mm_struct *mm)
-+{
-+      free_ldt_pgtables(mm);
-+}
-+
- static int read_ldt(void __user *ptr, unsigned long bytecount)
- {
-       struct mm_struct *mm = current->mm;
-@@ -286,6 +407,18 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
-       new_ldt->entries[ldt_info.entry_number] = ldt;
-       finalize_ldt_struct(new_ldt);
- 
-+      /*
-+       * If we are using PTI, map the new LDT into the userspace pagetables.
-+       * If there is already an LDT, use the other slot so that other CPUs
-+       * will continue to use the old LDT until install_ldt() switches
-+       * them over to the new LDT.
-+       */
-+      error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
-+      if (error) {
-+              free_ldt_struct(old_ldt);
-+              goto out_unlock;
-+      }
-+
-       install_ldt(mm, new_ldt);
-       free_ldt_struct(old_ldt);
-       error = 0;
-diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
-index 3b7720404a9f..eed93dd4cb4a 100644
---- a/arch/x86/mm/dump_pagetables.c
-+++ b/arch/x86/mm/dump_pagetables.c
-@@ -52,11 +52,17 @@ enum address_markers_idx {
-       USER_SPACE_NR = 0,
-       KERNEL_SPACE_NR,
-       LOW_KERNEL_NR,
-+#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
-+      LDT_NR,
-+#endif
-       VMALLOC_START_NR,
-       VMEMMAP_START_NR,
- #ifdef CONFIG_KASAN
-       KASAN_SHADOW_START_NR,
-       KASAN_SHADOW_END_NR,
-+#endif
-+#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
-+      LDT_NR,
- #endif
-       CPU_ENTRY_AREA_NR,
- #ifdef CONFIG_X86_ESPFIX64
-@@ -81,6 +87,9 @@ static struct addr_marker address_markers[] = {
- #ifdef CONFIG_KASAN
-       [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
-       [KASAN_SHADOW_END_NR]   = { KASAN_SHADOW_END,   "KASAN shadow end" },
-+#endif
-+#ifdef CONFIG_MODIFY_LDT_SYSCALL
-+      [LDT_NR]                = { LDT_BASE_ADDR,      "LDT remap" },
- #endif
-       [CPU_ENTRY_AREA_NR]     = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
- #ifdef CONFIG_X86_ESPFIX64
--- 
-2.14.2
-
diff --git a/patches/kernel/0208-x86-pti-Map-the-vsyscall-page-if-needed.patch b/patches/kernel/0208-x86-pti-Map-the-vsyscall-page-if-needed.patch

deleted file mode 100644 (file)

index e6e0ecc..0000000
--- a/patches/kernel/0208-x86-pti-Map-the-vsyscall-page-if-needed.patch
+++ /dev/null
@@ -1,172 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Tue, 12 Dec 2017 07:56:42 -0800
-Subject: [PATCH] x86/pti: Map the vsyscall page if needed
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Make VSYSCALLs work fully in PTI mode by mapping them properly to the user
-space visible page tables.
-
-[ tglx: Hide unused functions (Patch by Arnd Bergmann) ]
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Kees Cook <keescook@chromium.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 85900ea51577e31b186e523c8f4e068c79ecc7d3)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 7a2ba0ea0a18cfc1f18c3f1389ef85f2a0d3227d)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/vsyscall.h       |  1 +
- arch/x86/entry/vsyscall/vsyscall_64.c |  6 ++--
- arch/x86/mm/pti.c                     | 65 +++++++++++++++++++++++++++++++++++
- 3 files changed, 69 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
-index 6ba66ee79710..0eaeb223d692 100644
---- a/arch/x86/include/asm/vsyscall.h
-+++ b/arch/x86/include/asm/vsyscall.h
-@@ -6,6 +6,7 @@
- 
- #ifdef CONFIG_X86_VSYSCALL_EMULATION
- extern void map_vsyscall(void);
-+extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
- 
- /*
-  * Called on instruction fetch fault in vsyscall page.
-diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
-index 5e56a4ced848..238b4bcd3c47 100644
---- a/arch/x86/entry/vsyscall/vsyscall_64.c
-+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
-@@ -343,14 +343,14 @@ int in_gate_area_no_mm(unsigned long addr)
-  * vsyscalls but leave the page not present.  If so, we skip calling
-  * this.
-  */
--static void __init set_vsyscall_pgtable_user_bits(void)
-+void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
- {
-       pgd_t *pgd;
-       p4d_t *p4d;
-       pud_t *pud;
-       pmd_t *pmd;
- 
--      pgd = pgd_offset_k(VSYSCALL_ADDR);
-+      pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
-       set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
-       p4d = p4d_offset(pgd, VSYSCALL_ADDR);
- #if CONFIG_PGTABLE_LEVELS >= 5
-@@ -372,7 +372,7 @@ void __init map_vsyscall(void)
-                            vsyscall_mode == NATIVE
-                            ? PAGE_KERNEL_VSYSCALL
-                            : PAGE_KERNEL_VVAR);
--              set_vsyscall_pgtable_user_bits();
-+              set_vsyscall_pgtable_user_bits(swapper_pg_dir);
-       }
- 
-       BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
-diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
-index b1c38ef9fbbb..bce8aea65606 100644
---- a/arch/x86/mm/pti.c
-+++ b/arch/x86/mm/pti.c
-@@ -38,6 +38,7 @@
- 
- #include <asm/cpufeature.h>
- #include <asm/hypervisor.h>
-+#include <asm/vsyscall.h>
- #include <asm/cmdline.h>
- #include <asm/pti.h>
- #include <asm/pgtable.h>
-@@ -223,6 +224,69 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
-       return pmd_offset(pud, address);
- }
- 
-+#ifdef CONFIG_X86_VSYSCALL_EMULATION
-+/*
-+ * Walk the shadow copy of the page tables (optionally) trying to allocate
-+ * page table pages on the way down.  Does not support large pages.
-+ *
-+ * Note: this is only used when mapping *new* kernel data into the
-+ * user/shadow page tables.  It is never used for userspace data.
-+ *
-+ * Returns a pointer to a PTE on success, or NULL on failure.
-+ */
-+static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
-+{
-+      gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
-+      pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
-+      pte_t *pte;
-+
-+      /* We can't do anything sensible if we hit a large mapping. */
-+      if (pmd_large(*pmd)) {
-+              WARN_ON(1);
-+              return NULL;
-+      }
-+
-+      if (pmd_none(*pmd)) {
-+              unsigned long new_pte_page = __get_free_page(gfp);
-+              if (!new_pte_page)
-+                      return NULL;
-+
-+              if (pmd_none(*pmd)) {
-+                      set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
-+                      new_pte_page = 0;
-+              }
-+              if (new_pte_page)
-+                      free_page(new_pte_page);
-+      }
-+
-+      pte = pte_offset_kernel(pmd, address);
-+      if (pte_flags(*pte) & _PAGE_USER) {
-+              WARN_ONCE(1, "attempt to walk to user pte\n");
-+              return NULL;
-+      }
-+      return pte;
-+}
-+
-+static void __init pti_setup_vsyscall(void)
-+{
-+      pte_t *pte, *target_pte;
-+      unsigned int level;
-+
-+      pte = lookup_address(VSYSCALL_ADDR, &level);
-+      if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
-+              return;
-+
-+      target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
-+      if (WARN_ON(!target_pte))
-+              return;
-+
-+      *target_pte = *pte;
-+      set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
-+}
-+#else
-+static void __init pti_setup_vsyscall(void) { }
-+#endif
-+
- static void __init
- pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
- {
-@@ -319,4 +383,5 @@ void __init pti_init(void)
-       pti_clone_user_shared();
-       pti_clone_entry_text();
-       pti_setup_espfix64();
-+      pti_setup_vsyscall();
- }
--- 
-2.14.2
-
diff --git a/patches/kernel/0208-x86-pti-Put-the-LDT-in-its-own-PGD-if-PTI-is-on.patch b/patches/kernel/0208-x86-pti-Put-the-LDT-in-its-own-PGD-if-PTI-is-on.patch

new file mode 100644 (file)

index 0000000..74b842f
--- /dev/null
+++ b/patches/kernel/0208-x86-pti-Put-the-LDT-in-its-own-PGD-if-PTI-is-on.patch
@@ -0,0 +1,466 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 12 Dec 2017 07:56:45 -0800
+Subject: [PATCH] x86/pti: Put the LDT in its own PGD if PTI is on
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+With PTI enabled, the LDT must be mapped in the usermode tables somewhere.
+The LDT is per process, i.e. per mm.
+
+An earlier approach mapped the LDT on context switch into a fixmap area,
+but that's a big overhead and exhausted the fixmap space when NR_CPUS got
+big.
+
+Take advantage of the fact that there is an address space hole which
+provides a completely unused pgd. Use this pgd to manage per-mm LDT
+mappings.
+
+This has a down side: the LDT isn't (currently) randomized, and an attack
+that can write the LDT is instant root due to call gates (thanks, AMD, for
+leaving call gates in AMD64 but designing them wrong so they're only useful
+for exploits).  This can be mitigated by making the LDT read-only or
+randomizing the mapping, either of which is strightforward on top of this
+patch.
+
+This will significantly slow down LDT users, but that shouldn't matter for
+important workloads -- the LDT is only used by DOSEMU(2), Wine, and very
+old libc implementations.
+
+[ tglx: Cleaned it up. ]
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Kirill A. Shutemov <kirill@shutemov.name>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit f55f0501cbf65ec41cca5058513031b711730b1d)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit c250643846b45ea6782fb0cfcc15e8cd34744bc7)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/x86/x86_64/mm.txt         |   3 +-
+ arch/x86/include/asm/mmu_context.h      |  59 ++++++++++++--
+ arch/x86/include/asm/pgtable_64_types.h |   4 +
+ arch/x86/include/asm/processor.h        |  23 ++++--
+ arch/x86/kernel/ldt.c                   | 139 +++++++++++++++++++++++++++++++-
+ arch/x86/mm/dump_pagetables.c           |   9 +++
+ 6 files changed, 220 insertions(+), 17 deletions(-)
+
+diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
+index 496a1dbf139d..ad41b3813f0a 100644
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
+ ... unused hole ...
+ ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
+ ... unused hole ...
++fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
+ fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
+ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ... unused hole ...
+@@ -29,7 +30,7 @@ Virtual memory map with 5 level page tables:
+ hole caused by [56:63] sign extension
+ ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
+ ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
+-ff90000000000000 - ff9fffffffffffff (=52 bits) hole
++ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
+ ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
+ ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
+ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index 89a01ad7e370..9e3546e1c0f4 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -49,10 +49,33 @@ struct ldt_struct {
+        * call gates.  On native, we could merge the ldt_struct and LDT
+        * allocations, but it's not worth trying to optimize.
+        */
+-      struct desc_struct *entries;
+-      unsigned int nr_entries;
++      struct desc_struct      *entries;
++      unsigned int            nr_entries;
++
++      /*
++       * If PTI is in use, then the entries array is not mapped while we're
++       * in user mode.  The whole array will be aliased at the addressed
++       * given by ldt_slot_va(slot).  We use two slots so that we can allocate
++       * and map, and enable a new LDT without invalidating the mapping
++       * of an older, still-in-use LDT.
++       *
++       * slot will be -1 if this LDT doesn't have an alias mapping.
++       */
++      int                     slot;
+ };
+ 
++/* This is a multiple of PAGE_SIZE. */
++#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
++
++static inline void *ldt_slot_va(int slot)
++{
++#ifdef CONFIG_X86_64
++      return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
++#else
++      BUG();
++#endif
++}
++
+ /*
+  * Used for LDT copy/destruction.
+  */
+@@ -63,6 +86,7 @@ static inline void init_new_context_ldt(struct mm_struct *mm)
+ }
+ int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
+ void destroy_context_ldt(struct mm_struct *mm);
++void ldt_arch_exit_mmap(struct mm_struct *mm);
+ #else /* CONFIG_MODIFY_LDT_SYSCALL */
+ static inline void init_new_context_ldt(struct mm_struct *mm) { }
+ static inline int ldt_dup_context(struct mm_struct *oldmm,
+@@ -70,7 +94,8 @@ static inline int ldt_dup_context(struct mm_struct *oldmm,
+ {
+       return 0;
+ }
+-static inline void destroy_context_ldt(struct mm_struct *mm) {}
++static inline void destroy_context_ldt(struct mm_struct *mm) { }
++static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
+ #endif
+ 
+ static inline void load_mm_ldt(struct mm_struct *mm)
+@@ -95,10 +120,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
+        * that we can see.
+        */
+ 
+-      if (unlikely(ldt))
+-              set_ldt(ldt->entries, ldt->nr_entries);
+-      else
++      if (unlikely(ldt)) {
++              if (static_cpu_has(X86_FEATURE_PTI)) {
++                      if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
++                              /*
++                               * Whoops -- either the new LDT isn't mapped
++                               * (if slot == -1) or is mapped into a bogus
++                               * slot (if slot > 1).
++                               */
++                              clear_LDT();
++                              return;
++                      }
++
++                      /*
++                       * If page table isolation is enabled, ldt->entries
++                       * will not be mapped in the userspace pagetables.
++                       * Tell the CPU to access the LDT through the alias
++                       * at ldt_slot_va(ldt->slot).
++                       */
++                      set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
++              } else {
++                      set_ldt(ldt->entries, ldt->nr_entries);
++              }
++      } else {
+               clear_LDT();
++      }
+ #else
+       clear_LDT();
+ #endif
+@@ -193,6 +239,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+ static inline void arch_exit_mmap(struct mm_struct *mm)
+ {
+       paravirt_arch_exit_mmap(mm);
++      ldt_arch_exit_mmap(mm);
+ }
+ 
+ #ifdef CONFIG_X86_64
+diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
+index 5932dead34ee..e8a809ee0bb6 100644
+--- a/arch/x86/include/asm/pgtable_64_types.h
++++ b/arch/x86/include/asm/pgtable_64_types.h
+@@ -81,10 +81,14 @@ typedef struct { pteval_t pte; } pte_t;
+ # define VMALLOC_SIZE_TB      _AC(12800, UL)
+ # define __VMALLOC_BASE               _AC(0xffa0000000000000, UL)
+ # define __VMEMMAP_BASE               _AC(0xffd4000000000000, UL)
++# define LDT_PGD_ENTRY                _AC(-112, UL)
++# define LDT_BASE_ADDR                (LDT_PGD_ENTRY << PGDIR_SHIFT)
+ #else
+ # define VMALLOC_SIZE_TB      _AC(32, UL)
+ # define __VMALLOC_BASE               _AC(0xffffc90000000000, UL)
+ # define __VMEMMAP_BASE               _AC(0xffffea0000000000, UL)
++# define LDT_PGD_ENTRY                _AC(-4, UL)
++# define LDT_BASE_ADDR                (LDT_PGD_ENTRY << PGDIR_SHIFT)
+ #endif
+ 
+ #ifdef CONFIG_RANDOMIZE_MEMORY
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 935d68609922..24503521c947 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -843,13 +843,22 @@ static inline void spin_lock_prefetch(const void *x)
+ 
+ #else
+ /*
+- * User space process size. 47bits minus one guard page.  The guard
+- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
+- * the highest possible canonical userspace address, then that
+- * syscall will enter the kernel with a non-canonical return
+- * address, and SYSRET will explode dangerously.  We avoid this
+- * particular problem by preventing anything from being mapped
+- * at the maximum canonical address.
++ * User space process size.  This is the first address outside the user range.
++ * There are a few constraints that determine this:
++ *
++ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
++ * address, then that syscall will enter the kernel with a
++ * non-canonical return address, and SYSRET will explode dangerously.
++ * We avoid this particular problem by preventing anything executable
++ * from being mapped at the maximum canonical address.
++ *
++ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
++ * CPUs malfunction if they execute code from the highest canonical page.
++ * They'll speculate right off the end of the canonical space, and
++ * bad things happen.  This is worked around in the same way as the
++ * Intel problem.
++ *
++ * With page table isolation enabled, we map the LDT in ... [stay tuned]
+  */
+ #define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
+ 
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
+index 74a5aaf13f3c..eceaada581ff 100644
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -23,6 +23,7 @@
+ #include <linux/uaccess.h>
+ 
+ #include <asm/ldt.h>
++#include <asm/tlb.h>
+ #include <asm/desc.h>
+ #include <asm/mmu_context.h>
+ #include <asm/syscalls.h>
+@@ -50,13 +51,11 @@ static void refresh_ldt_segments(void)
+ static void flush_ldt(void *__mm)
+ {
+       struct mm_struct *mm = __mm;
+-      mm_context_t *pc;
+ 
+       if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
+               return;
+ 
+-      pc = &mm->context;
+-      set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
++      load_mm_ldt(mm);
+ 
+       refresh_ldt_segments();
+ }
+@@ -93,10 +92,121 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
+               return NULL;
+       }
+ 
++      /* The new LDT isn't aliased for PTI yet. */
++      new_ldt->slot = -1;
++
+       new_ldt->nr_entries = num_entries;
+       return new_ldt;
+ }
+ 
++/*
++ * If PTI is enabled, this maps the LDT into the kernelmode and
++ * usermode tables for the given mm.
++ *
++ * There is no corresponding unmap function.  Even if the LDT is freed, we
++ * leave the PTEs around until the slot is reused or the mm is destroyed.
++ * This is harmless: the LDT is always in ordinary memory, and no one will
++ * access the freed slot.
++ *
++ * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
++ * it useful, and the flush would slow down modify_ldt().
++ */
++static int
++map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
++{
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++      bool is_vmalloc, had_top_level_entry;
++      unsigned long va;
++      spinlock_t *ptl;
++      pgd_t *pgd;
++      int i;
++
++      if (!static_cpu_has(X86_FEATURE_PTI))
++              return 0;
++
++      /*
++       * Any given ldt_struct should have map_ldt_struct() called at most
++       * once.
++       */
++      WARN_ON(ldt->slot != -1);
++
++      /*
++       * Did we already have the top level entry allocated?  We can't
++       * use pgd_none() for this because it doens't do anything on
++       * 4-level page table kernels.
++       */
++      pgd = pgd_offset(mm, LDT_BASE_ADDR);
++      had_top_level_entry = (pgd->pgd != 0);
++
++      is_vmalloc = is_vmalloc_addr(ldt->entries);
++
++      for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
++              unsigned long offset = i << PAGE_SHIFT;
++              const void *src = (char *)ldt->entries + offset;
++              unsigned long pfn;
++              pte_t pte, *ptep;
++
++              va = (unsigned long)ldt_slot_va(slot) + offset;
++              pfn = is_vmalloc ? vmalloc_to_pfn(src) :
++                      page_to_pfn(virt_to_page(src));
++              /*
++               * Treat the PTI LDT range as a *userspace* range.
++               * get_locked_pte() will allocate all needed pagetables
++               * and account for them in this mm.
++               */
++              ptep = get_locked_pte(mm, va, &ptl);
++              if (!ptep)
++                      return -ENOMEM;
++              pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL));
++              set_pte_at(mm, va, ptep, pte);
++              pte_unmap_unlock(ptep, ptl);
++      }
++
++      if (mm->context.ldt) {
++              /*
++               * We already had an LDT.  The top-level entry should already
++               * have been allocated and synchronized with the usermode
++               * tables.
++               */
++              WARN_ON(!had_top_level_entry);
++              if (static_cpu_has(X86_FEATURE_PTI))
++                      WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
++      } else {
++              /*
++               * This is the first time we're mapping an LDT for this process.
++               * Sync the pgd to the usermode tables.
++               */
++              WARN_ON(had_top_level_entry);
++              if (static_cpu_has(X86_FEATURE_PTI)) {
++                      WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
++                      set_pgd(kernel_to_user_pgdp(pgd), *pgd);
++              }
++      }
++
++      va = (unsigned long)ldt_slot_va(slot);
++      flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
++
++      ldt->slot = slot;
++#endif
++      return 0;
++}
++
++static void free_ldt_pgtables(struct mm_struct *mm)
++{
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++      struct mmu_gather tlb;
++      unsigned long start = LDT_BASE_ADDR;
++      unsigned long end = start + (1UL << PGDIR_SHIFT);
++
++      if (!static_cpu_has(X86_FEATURE_PTI))
++              return;
++
++      tlb_gather_mmu(&tlb, mm, start, end);
++      free_pgd_range(&tlb, start, end, start, end);
++      tlb_finish_mmu(&tlb, start, end);
++#endif
++}
++
+ /* After calling this, the LDT is immutable. */
+ static void finalize_ldt_struct(struct ldt_struct *ldt)
+ {
+@@ -155,6 +265,12 @@ int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
+              new_ldt->nr_entries * LDT_ENTRY_SIZE);
+       finalize_ldt_struct(new_ldt);
+ 
++      retval = map_ldt_struct(mm, new_ldt, 0);
++      if (retval) {
++              free_ldt_pgtables(mm);
++              free_ldt_struct(new_ldt);
++              goto out_unlock;
++      }
+       mm->context.ldt = new_ldt;
+ 
+ out_unlock:
+@@ -173,6 +289,11 @@ void destroy_context_ldt(struct mm_struct *mm)
+       mm->context.ldt = NULL;
+ }
+ 
++void ldt_arch_exit_mmap(struct mm_struct *mm)
++{
++      free_ldt_pgtables(mm);
++}
++
+ static int read_ldt(void __user *ptr, unsigned long bytecount)
+ {
+       struct mm_struct *mm = current->mm;
+@@ -286,6 +407,18 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
+       new_ldt->entries[ldt_info.entry_number] = ldt;
+       finalize_ldt_struct(new_ldt);
+ 
++      /*
++       * If we are using PTI, map the new LDT into the userspace pagetables.
++       * If there is already an LDT, use the other slot so that other CPUs
++       * will continue to use the old LDT until install_ldt() switches
++       * them over to the new LDT.
++       */
++      error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
++      if (error) {
++              free_ldt_struct(old_ldt);
++              goto out_unlock;
++      }
++
+       install_ldt(mm, new_ldt);
+       free_ldt_struct(old_ldt);
+       error = 0;
+diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
+index 3b7720404a9f..eed93dd4cb4a 100644
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -52,11 +52,17 @@ enum address_markers_idx {
+       USER_SPACE_NR = 0,
+       KERNEL_SPACE_NR,
+       LOW_KERNEL_NR,
++#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
++      LDT_NR,
++#endif
+       VMALLOC_START_NR,
+       VMEMMAP_START_NR,
+ #ifdef CONFIG_KASAN
+       KASAN_SHADOW_START_NR,
+       KASAN_SHADOW_END_NR,
++#endif
++#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
++      LDT_NR,
+ #endif
+       CPU_ENTRY_AREA_NR,
+ #ifdef CONFIG_X86_ESPFIX64
+@@ -81,6 +87,9 @@ static struct addr_marker address_markers[] = {
+ #ifdef CONFIG_KASAN
+       [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
+       [KASAN_SHADOW_END_NR]   = { KASAN_SHADOW_END,   "KASAN shadow end" },
++#endif
++#ifdef CONFIG_MODIFY_LDT_SYSCALL
++      [LDT_NR]                = { LDT_BASE_ADDR,      "LDT remap" },
+ #endif
+       [CPU_ENTRY_AREA_NR]     = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
+ #ifdef CONFIG_X86_ESPFIX64
+-- 
+2.14.2
+
diff --git a/patches/kernel/0209-x86-mm-Allow-flushing-for-future-ASID-switches.patch b/patches/kernel/0209-x86-mm-Allow-flushing-for-future-ASID-switches.patch

deleted file mode 100644 (file)

index 2314bde..0000000
--- a/patches/kernel/0209-x86-mm-Allow-flushing-for-future-ASID-switches.patch
+++ /dev/null
@@ -1,192 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:07:57 +0100
-Subject: [PATCH] x86/mm: Allow flushing for future ASID switches
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-If changing the page tables in such a way that an invalidation of all
-contexts (aka. PCIDs / ASIDs) is required, they can be actively invalidated
-by:
-
- 1. INVPCID for each PCID (works for single pages too).
-
- 2. Load CR3 with each PCID without the NOFLUSH bit set
-
- 3. Load CR3 with the NOFLUSH bit set for each and do INVLPG for each address.
-
-But, none of these are really feasible since there are ~6 ASIDs (12 with
-PAGE_TABLE_ISOLATION) at the time that invalidation is required.
-Instead of actively invalidating them, invalidate the *current* context and
-also mark the cpu_tlbstate _quickly_ to indicate future invalidation to be
-required.
-
-At the next context-switch, look for this indicator
-('invalidate_other' being set) invalidate all of the
-cpu_tlbstate.ctxs[] entries.
-
-This ensures that any future context switches will do a full flush
-of the TLB, picking up the previous changes.
-
-[ tglx: Folded more fixups from Peter ]
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 2ea907c4fe7b78e5840c1dc07800eae93248cad1)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit fbb7e6e9e7e7cedecc164d660d08563f88103b56)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/tlbflush.h | 37 +++++++++++++++++++++++++++++--------
- arch/x86/mm/tlb.c               | 35 +++++++++++++++++++++++++++++++++++
- 2 files changed, 64 insertions(+), 8 deletions(-)
-
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 503f87c30c15..3769ce182eac 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -124,6 +124,17 @@ struct tlb_state {
-        */
-       bool is_lazy;
- 
-+      /*
-+       * If set we changed the page tables in such a way that we
-+       * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
-+       * This tells us to go invalidate all the non-loaded ctxs[]
-+       * on the next context switch.
-+       *
-+       * The current ctx was kept up-to-date as it ran and does not
-+       * need to be invalidated.
-+       */
-+      bool invalidate_other;
-+
-       /*
-        * Access to this CR4 shadow and to H/W CR4 is protected by
-        * disabling interrupts when modifying either one.
-@@ -201,6 +212,14 @@ static inline unsigned long cr4_read_shadow(void)
-       return this_cpu_read(cpu_tlbstate.cr4);
- }
- 
-+/*
-+ * Mark all other ASIDs as invalid, preserves the current.
-+ */
-+static inline void invalidate_other_asid(void)
-+{
-+      this_cpu_write(cpu_tlbstate.invalidate_other, true);
-+}
-+
- /*
-  * Save some of cr4 feature set we're using (e.g.  Pentium 4MB
-  * enable and PPro Global page enable), so that any CPU's that boot
-@@ -287,14 +306,6 @@ static inline void __flush_tlb_all(void)
-                */
-               __flush_tlb();
-       }
--
--      /*
--       * Note: if we somehow had PCID but not PGE, then this wouldn't work --
--       * we'd end up flushing kernel translations for the current ASID but
--       * we might fail to flush kernel translations for other cached ASIDs.
--       *
--       * To avoid this issue, we force PCID off if PGE is off.
--       */
- }
- 
- /*
-@@ -304,6 +315,16 @@ static inline void __flush_tlb_one(unsigned long addr)
- {
-       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
-       __flush_tlb_single(addr);
-+
-+      if (!static_cpu_has(X86_FEATURE_PTI))
-+              return;
-+
-+      /*
-+       * __flush_tlb_single() will have cleared the TLB entry for this ASID,
-+       * but since kernel space is replicated across all, we must also
-+       * invalidate all others.
-+       */
-+      invalidate_other_asid();
- }
- 
- #define TLB_FLUSH_ALL -1UL
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 87d4f961bcb4..ce87b69fb4e0 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -28,6 +28,38 @@
-  *    Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
-  */
- 
-+/*
-+ * We get here when we do something requiring a TLB invalidation
-+ * but could not go invalidate all of the contexts.  We do the
-+ * necessary invalidation by clearing out the 'ctx_id' which
-+ * forces a TLB flush when the context is loaded.
-+ */
-+void clear_asid_other(void)
-+{
-+      u16 asid;
-+
-+      /*
-+       * This is only expected to be set if we have disabled
-+       * kernel _PAGE_GLOBAL pages.
-+       */
-+      if (!static_cpu_has(X86_FEATURE_PTI)) {
-+              WARN_ON_ONCE(1);
-+              return;
-+      }
-+
-+      for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
-+              /* Do not need to flush the current asid */
-+              if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
-+                      continue;
-+              /*
-+               * Make sure the next time we go to switch to
-+               * this asid, we do a flush:
-+               */
-+              this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
-+      }
-+      this_cpu_write(cpu_tlbstate.invalidate_other, false);
-+}
-+
- atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
- 
- DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
-@@ -43,6 +75,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
-               return;
-       }
- 
-+      if (this_cpu_read(cpu_tlbstate.invalidate_other))
-+              clear_asid_other();
-+
-       for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
-               if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
-                   next->context.ctx_id)
--- 
-2.14.2
-
diff --git a/patches/kernel/0209-x86-pti-Map-the-vsyscall-page-if-needed.patch b/patches/kernel/0209-x86-pti-Map-the-vsyscall-page-if-needed.patch

new file mode 100644 (file)

index 0000000..e6e0ecc
--- /dev/null
+++ b/patches/kernel/0209-x86-pti-Map-the-vsyscall-page-if-needed.patch
@@ -0,0 +1,172 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 12 Dec 2017 07:56:42 -0800
+Subject: [PATCH] x86/pti: Map the vsyscall page if needed
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Make VSYSCALLs work fully in PTI mode by mapping them properly to the user
+space visible page tables.
+
+[ tglx: Hide unused functions (Patch by Arnd Bergmann) ]
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 85900ea51577e31b186e523c8f4e068c79ecc7d3)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 7a2ba0ea0a18cfc1f18c3f1389ef85f2a0d3227d)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/vsyscall.h       |  1 +
+ arch/x86/entry/vsyscall/vsyscall_64.c |  6 ++--
+ arch/x86/mm/pti.c                     | 65 +++++++++++++++++++++++++++++++++++
+ 3 files changed, 69 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
+index 6ba66ee79710..0eaeb223d692 100644
+--- a/arch/x86/include/asm/vsyscall.h
++++ b/arch/x86/include/asm/vsyscall.h
+@@ -6,6 +6,7 @@
+ 
+ #ifdef CONFIG_X86_VSYSCALL_EMULATION
+ extern void map_vsyscall(void);
++extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
+ 
+ /*
+  * Called on instruction fetch fault in vsyscall page.
+diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
+index 5e56a4ced848..238b4bcd3c47 100644
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -343,14 +343,14 @@ int in_gate_area_no_mm(unsigned long addr)
+  * vsyscalls but leave the page not present.  If so, we skip calling
+  * this.
+  */
+-static void __init set_vsyscall_pgtable_user_bits(void)
++void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
+ {
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+ 
+-      pgd = pgd_offset_k(VSYSCALL_ADDR);
++      pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
+       set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+       p4d = p4d_offset(pgd, VSYSCALL_ADDR);
+ #if CONFIG_PGTABLE_LEVELS >= 5
+@@ -372,7 +372,7 @@ void __init map_vsyscall(void)
+                            vsyscall_mode == NATIVE
+                            ? PAGE_KERNEL_VSYSCALL
+                            : PAGE_KERNEL_VVAR);
+-              set_vsyscall_pgtable_user_bits();
++              set_vsyscall_pgtable_user_bits(swapper_pg_dir);
+       }
+ 
+       BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+index b1c38ef9fbbb..bce8aea65606 100644
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -38,6 +38,7 @@
+ 
+ #include <asm/cpufeature.h>
+ #include <asm/hypervisor.h>
++#include <asm/vsyscall.h>
+ #include <asm/cmdline.h>
+ #include <asm/pti.h>
+ #include <asm/pgtable.h>
+@@ -223,6 +224,69 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+       return pmd_offset(pud, address);
+ }
+ 
++#ifdef CONFIG_X86_VSYSCALL_EMULATION
++/*
++ * Walk the shadow copy of the page tables (optionally) trying to allocate
++ * page table pages on the way down.  Does not support large pages.
++ *
++ * Note: this is only used when mapping *new* kernel data into the
++ * user/shadow page tables.  It is never used for userspace data.
++ *
++ * Returns a pointer to a PTE on success, or NULL on failure.
++ */
++static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
++{
++      gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
++      pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
++      pte_t *pte;
++
++      /* We can't do anything sensible if we hit a large mapping. */
++      if (pmd_large(*pmd)) {
++              WARN_ON(1);
++              return NULL;
++      }
++
++      if (pmd_none(*pmd)) {
++              unsigned long new_pte_page = __get_free_page(gfp);
++              if (!new_pte_page)
++                      return NULL;
++
++              if (pmd_none(*pmd)) {
++                      set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
++                      new_pte_page = 0;
++              }
++              if (new_pte_page)
++                      free_page(new_pte_page);
++      }
++
++      pte = pte_offset_kernel(pmd, address);
++      if (pte_flags(*pte) & _PAGE_USER) {
++              WARN_ONCE(1, "attempt to walk to user pte\n");
++              return NULL;
++      }
++      return pte;
++}
++
++static void __init pti_setup_vsyscall(void)
++{
++      pte_t *pte, *target_pte;
++      unsigned int level;
++
++      pte = lookup_address(VSYSCALL_ADDR, &level);
++      if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
++              return;
++
++      target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
++      if (WARN_ON(!target_pte))
++              return;
++
++      *target_pte = *pte;
++      set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
++}
++#else
++static void __init pti_setup_vsyscall(void) { }
++#endif
++
+ static void __init
+ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
+ {
+@@ -319,4 +383,5 @@ void __init pti_init(void)
+       pti_clone_user_shared();
+       pti_clone_entry_text();
+       pti_setup_espfix64();
++      pti_setup_vsyscall();
+ }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0210-x86-mm-Abstract-switching-CR3.patch b/patches/kernel/0210-x86-mm-Abstract-switching-CR3.patch

deleted file mode 100644 (file)

index 081d23a..0000000
--- a/patches/kernel/0210-x86-mm-Abstract-switching-CR3.patch
+++ /dev/null
@@ -1,96 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:07:58 +0100
-Subject: [PATCH] x86/mm: Abstract switching CR3
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-In preparation to adding additional PCID flushing, abstract the
-loading of a new ASID into CR3.
-
-[ PeterZ: Split out from big combo patch ]
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 48e111982cda033fec832c6b0592c2acedd85d04)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 1e2affe2a79305b3a5f3ad65d3f61ad9d1f9e168)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/tlb.c | 22 ++++++++++++++++++++--
- 1 file changed, 20 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index ce87b69fb4e0..353f2f4e1d96 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -101,6 +101,24 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
-       *need_flush = true;
- }
- 
-+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
-+{
-+      unsigned long new_mm_cr3;
-+
-+      if (need_flush) {
-+              new_mm_cr3 = build_cr3(pgdir, new_asid);
-+      } else {
-+              new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
-+      }
-+
-+      /*
-+       * Caution: many callers of this function expect
-+       * that load_cr3() is serializing and orders TLB
-+       * fills with respect to the mm_cpumask writes.
-+       */
-+      write_cr3(new_mm_cr3);
-+}
-+
- void leave_mm(int cpu)
- {
-       struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
-@@ -228,7 +246,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-               if (need_flush) {
-                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
-                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
--                      write_cr3(build_cr3(next->pgd, new_asid));
-+                      load_new_mm_cr3(next->pgd, new_asid, true);
- 
-                       /*
-                        * NB: This gets called via leave_mm() in the idle path
-@@ -241,7 +259,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-                       trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-               } else {
-                       /* The new ASID is already up to date. */
--                      write_cr3(build_cr3_noflush(next->pgd, new_asid));
-+                      load_new_mm_cr3(next->pgd, new_asid, false);
- 
-                       /* See above wrt _rcuidle. */
-                       trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
--- 
-2.14.2
-
diff --git a/patches/kernel/0210-x86-mm-Allow-flushing-for-future-ASID-switches.patch b/patches/kernel/0210-x86-mm-Allow-flushing-for-future-ASID-switches.patch

new file mode 100644 (file)

index 0000000..2314bde
--- /dev/null
+++ b/patches/kernel/0210-x86-mm-Allow-flushing-for-future-ASID-switches.patch
@@ -0,0 +1,192 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:57 +0100
+Subject: [PATCH] x86/mm: Allow flushing for future ASID switches
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+If changing the page tables in such a way that an invalidation of all
+contexts (aka. PCIDs / ASIDs) is required, they can be actively invalidated
+by:
+
+ 1. INVPCID for each PCID (works for single pages too).
+
+ 2. Load CR3 with each PCID without the NOFLUSH bit set
+
+ 3. Load CR3 with the NOFLUSH bit set for each and do INVLPG for each address.
+
+But, none of these are really feasible since there are ~6 ASIDs (12 with
+PAGE_TABLE_ISOLATION) at the time that invalidation is required.
+Instead of actively invalidating them, invalidate the *current* context and
+also mark the cpu_tlbstate _quickly_ to indicate future invalidation to be
+required.
+
+At the next context-switch, look for this indicator
+('invalidate_other' being set) invalidate all of the
+cpu_tlbstate.ctxs[] entries.
+
+This ensures that any future context switches will do a full flush
+of the TLB, picking up the previous changes.
+
+[ tglx: Folded more fixups from Peter ]
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 2ea907c4fe7b78e5840c1dc07800eae93248cad1)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit fbb7e6e9e7e7cedecc164d660d08563f88103b56)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/tlbflush.h | 37 +++++++++++++++++++++++++++++--------
+ arch/x86/mm/tlb.c               | 35 +++++++++++++++++++++++++++++++++++
+ 2 files changed, 64 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 503f87c30c15..3769ce182eac 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -124,6 +124,17 @@ struct tlb_state {
+        */
+       bool is_lazy;
+ 
++      /*
++       * If set we changed the page tables in such a way that we
++       * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
++       * This tells us to go invalidate all the non-loaded ctxs[]
++       * on the next context switch.
++       *
++       * The current ctx was kept up-to-date as it ran and does not
++       * need to be invalidated.
++       */
++      bool invalidate_other;
++
+       /*
+        * Access to this CR4 shadow and to H/W CR4 is protected by
+        * disabling interrupts when modifying either one.
+@@ -201,6 +212,14 @@ static inline unsigned long cr4_read_shadow(void)
+       return this_cpu_read(cpu_tlbstate.cr4);
+ }
+ 
++/*
++ * Mark all other ASIDs as invalid, preserves the current.
++ */
++static inline void invalidate_other_asid(void)
++{
++      this_cpu_write(cpu_tlbstate.invalidate_other, true);
++}
++
+ /*
+  * Save some of cr4 feature set we're using (e.g.  Pentium 4MB
+  * enable and PPro Global page enable), so that any CPU's that boot
+@@ -287,14 +306,6 @@ static inline void __flush_tlb_all(void)
+                */
+               __flush_tlb();
+       }
+-
+-      /*
+-       * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+-       * we'd end up flushing kernel translations for the current ASID but
+-       * we might fail to flush kernel translations for other cached ASIDs.
+-       *
+-       * To avoid this issue, we force PCID off if PGE is off.
+-       */
+ }
+ 
+ /*
+@@ -304,6 +315,16 @@ static inline void __flush_tlb_one(unsigned long addr)
+ {
+       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
+       __flush_tlb_single(addr);
++
++      if (!static_cpu_has(X86_FEATURE_PTI))
++              return;
++
++      /*
++       * __flush_tlb_single() will have cleared the TLB entry for this ASID,
++       * but since kernel space is replicated across all, we must also
++       * invalidate all others.
++       */
++      invalidate_other_asid();
+ }
+ 
+ #define TLB_FLUSH_ALL -1UL
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 87d4f961bcb4..ce87b69fb4e0 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -28,6 +28,38 @@
+  *    Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+  */
+ 
++/*
++ * We get here when we do something requiring a TLB invalidation
++ * but could not go invalidate all of the contexts.  We do the
++ * necessary invalidation by clearing out the 'ctx_id' which
++ * forces a TLB flush when the context is loaded.
++ */
++void clear_asid_other(void)
++{
++      u16 asid;
++
++      /*
++       * This is only expected to be set if we have disabled
++       * kernel _PAGE_GLOBAL pages.
++       */
++      if (!static_cpu_has(X86_FEATURE_PTI)) {
++              WARN_ON_ONCE(1);
++              return;
++      }
++
++      for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
++              /* Do not need to flush the current asid */
++              if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
++                      continue;
++              /*
++               * Make sure the next time we go to switch to
++               * this asid, we do a flush:
++               */
++              this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
++      }
++      this_cpu_write(cpu_tlbstate.invalidate_other, false);
++}
++
+ atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
+ 
+ DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
+@@ -43,6 +75,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
+               return;
+       }
+ 
++      if (this_cpu_read(cpu_tlbstate.invalidate_other))
++              clear_asid_other();
++
+       for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
+               if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
+                   next->context.ctx_id)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0211-x86-mm-Abstract-switching-CR3.patch b/patches/kernel/0211-x86-mm-Abstract-switching-CR3.patch

new file mode 100644 (file)

index 0000000..081d23a
--- /dev/null
+++ b/patches/kernel/0211-x86-mm-Abstract-switching-CR3.patch
@@ -0,0 +1,96 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:07:58 +0100
+Subject: [PATCH] x86/mm: Abstract switching CR3
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+In preparation to adding additional PCID flushing, abstract the
+loading of a new ASID into CR3.
+
+[ PeterZ: Split out from big combo patch ]
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 48e111982cda033fec832c6b0592c2acedd85d04)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 1e2affe2a79305b3a5f3ad65d3f61ad9d1f9e168)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/tlb.c | 22 ++++++++++++++++++++--
+ 1 file changed, 20 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index ce87b69fb4e0..353f2f4e1d96 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -101,6 +101,24 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
+       *need_flush = true;
+ }
+ 
++static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
++{
++      unsigned long new_mm_cr3;
++
++      if (need_flush) {
++              new_mm_cr3 = build_cr3(pgdir, new_asid);
++      } else {
++              new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
++      }
++
++      /*
++       * Caution: many callers of this function expect
++       * that load_cr3() is serializing and orders TLB
++       * fills with respect to the mm_cpumask writes.
++       */
++      write_cr3(new_mm_cr3);
++}
++
+ void leave_mm(int cpu)
+ {
+       struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+@@ -228,7 +246,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+               if (need_flush) {
+                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
+                       this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
+-                      write_cr3(build_cr3(next->pgd, new_asid));
++                      load_new_mm_cr3(next->pgd, new_asid, true);
+ 
+                       /*
+                        * NB: This gets called via leave_mm() in the idle path
+@@ -241,7 +259,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+                       trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+               } else {
+                       /* The new ASID is already up to date. */
+-                      write_cr3(build_cr3_noflush(next->pgd, new_asid));
++                      load_new_mm_cr3(next->pgd, new_asid, false);
+ 
+                       /* See above wrt _rcuidle. */
+                       trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0211-x86-mm-Use-Fix-PCID-to-optimize-user-kernel-switches.patch b/patches/kernel/0211-x86-mm-Use-Fix-PCID-to-optimize-user-kernel-switches.patch

deleted file mode 100644 (file)

index 5e21faf..0000000
--- a/patches/kernel/0211-x86-mm-Use-Fix-PCID-to-optimize-user-kernel-switches.patch
+++ /dev/null
@@ -1,497 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Mon, 4 Dec 2017 15:07:59 +0100
-Subject: [PATCH] x86/mm: Use/Fix PCID to optimize user/kernel switches
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-We can use PCID to retain the TLBs across CR3 switches; including those now
-part of the user/kernel switch. This increases performance of kernel
-entry/exit at the cost of more expensive/complicated TLB flushing.
-
-Now that we have two address spaces, one for kernel and one for user space,
-we need two PCIDs per mm. We use the top PCID bit to indicate a user PCID
-(just like we use the PFN LSB for the PGD). Since we do TLB invalidation
-from kernel space, the existing code will only invalidate the kernel PCID,
-we augment that by marking the corresponding user PCID invalid, and upon
-switching back to userspace, use a flushing CR3 write for the switch.
-
-In order to access the user_pcid_flush_mask we use PER_CPU storage, which
-means the previously established SWAPGS vs CR3 ordering is now mandatory
-and required.
-
-Having to do this memory access does require additional registers, most
-sites have a functioning stack and we can spill one (RAX), sites without
-functional stack need to otherwise provide the second scratch register.
-
-Note: PCID is generally available on Intel Sandybridge and later CPUs.
-Note: Up until this point TLB flushing was broken in this series.
-
-Based-on-code-from: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(backported from commit 6fd166aae78c0ab738d49bda653cbd9e3b1491cf)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit ac7471365d49c0a91d4b63453eb848cc19f17589)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/calling.h                    | 72 ++++++++++++++++++-----
- arch/x86/include/asm/processor-flags.h      |  5 ++
- arch/x86/include/asm/tlbflush.h             | 91 +++++++++++++++++++++++++----
- arch/x86/include/uapi/asm/processor-flags.h |  7 ++-
- arch/x86/kernel/asm-offsets.c               |  4 ++
- arch/x86/mm/init.c                          |  2 +-
- arch/x86/mm/tlb.c                           |  1 +
- arch/x86/entry/entry_64.S                   |  9 +--
- arch/x86/entry/entry_64_compat.S            |  4 +-
- 9 files changed, 162 insertions(+), 33 deletions(-)
-
-diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
-index bb56f5346ae8..ce5fb309926d 100644
---- a/arch/x86/entry/calling.h
-+++ b/arch/x86/entry/calling.h
-@@ -2,6 +2,9 @@
- #include <asm/unwind_hints.h>
- #include <asm/cpufeatures.h>
- #include <asm/page_types.h>
-+#include <asm/percpu.h>
-+#include <asm/asm-offsets.h>
-+#include <asm/processor-flags.h>
- 
- /*
- 
-@@ -190,17 +193,21 @@ For 32-bit we have the following conventions - kernel is built with
- 
- #ifdef CONFIG_PAGE_TABLE_ISOLATION
- 
--/* PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two halves: */
--#define PTI_SWITCH_MASK (1<<PAGE_SHIFT)
-+/*
-+ * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
-+ * halves:
-+ */
-+#define PTI_SWITCH_PGTABLES_MASK      (1<<PAGE_SHIFT)
-+#define PTI_SWITCH_MASK               (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
- 
--.macro ADJUST_KERNEL_CR3 reg:req
--      /* Clear "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
--      andq    $(~PTI_SWITCH_MASK), \reg
-+.macro SET_NOFLUSH_BIT        reg:req
-+      bts     $X86_CR3_PCID_NOFLUSH_BIT, \reg
- .endm
- 
--.macro ADJUST_USER_CR3 reg:req
--      /* Move CR3 up a page to the user page tables: */
--      orq     $(PTI_SWITCH_MASK), \reg
-+.macro ADJUST_KERNEL_CR3 reg:req
-+      ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
-+      /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
-+      andq    $(~PTI_SWITCH_MASK), \reg
- .endm
- 
- .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
-@@ -211,21 +218,58 @@ For 32-bit we have the following conventions - kernel is built with
- .Lend_\@:
- .endm
- 
--.macro SWITCH_TO_USER_CR3 scratch_reg:req
-+#define THIS_CPU_user_pcid_flush_mask   \
-+      PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
-+
-+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
-       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
-       mov     %cr3, \scratch_reg
--      ADJUST_USER_CR3 \scratch_reg
-+
-+      ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
-+
-+      /*
-+       * Test if the ASID needs a flush.
-+       */
-+      movq    \scratch_reg, \scratch_reg2
-+      andq    $(0x7FF), \scratch_reg          /* mask ASID */
-+      bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
-+      jnc     .Lnoflush_\@
-+
-+      /* Flush needed, clear the bit */
-+      btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
-+      movq    \scratch_reg2, \scratch_reg
-+      jmp     .Lwrcr3_\@
-+
-+.Lnoflush_\@:
-+      movq    \scratch_reg2, \scratch_reg
-+      SET_NOFLUSH_BIT \scratch_reg
-+
-+.Lwrcr3_\@:
-+      /* Flip the PGD and ASID to the user version */
-+      orq     $(PTI_SWITCH_MASK), \scratch_reg
-       mov     \scratch_reg, %cr3
- .Lend_\@:
- .endm
- 
-+.macro SWITCH_TO_USER_CR3_STACK       scratch_reg:req
-+      pushq   %rax
-+      SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
-+      popq    %rax
-+.endm
-+
- .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
-       ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
-       movq    %cr3, \scratch_reg
-       movq    \scratch_reg, \save_reg
-       /*
--       * Is the switch bit zero?  This means the address is
--       * up in real PAGE_TABLE_ISOLATION patches in a moment.
-+       * Is the "switch mask" all zero?  That means that both of
-+       * these are zero:
-+       *
-+       *      1. The user/kernel PCID bit, and
-+       *      2. The user/kernel "bit" that points CR3 to the
-+       *         bottom half of the 8k PGD
-+       *
-+       * That indicates a kernel CR3 value, not a user CR3.
-        */
-       testq   $(PTI_SWITCH_MASK), \scratch_reg
-       jz      .Ldone_\@
-@@ -250,7 +294,9 @@ For 32-bit we have the following conventions - kernel is built with
- 
- .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
- .endm
--.macro SWITCH_TO_USER_CR3 scratch_reg:req
-+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
-+.endm
-+.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
- .endm
- .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
- .endm
-diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
-index 791b60199aa4..fb9708d13761 100644
---- a/arch/x86/include/asm/processor-flags.h
-+++ b/arch/x86/include/asm/processor-flags.h
-@@ -36,6 +36,11 @@
- #define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull
- #define CR3_PCID_MASK 0xFFFull
- #define CR3_NOFLUSH (1UL << 63)
-+
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+# define X86_CR3_PTI_SWITCH_BIT       11
-+#endif
-+
- #else
- /*
-  * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 3769ce182eac..2b7b32c243f1 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -9,6 +9,8 @@
- #include <asm/special_insns.h>
- #include <asm/smp.h>
- #include <asm/invpcid.h>
-+#include <asm/pti.h>
-+#include <asm/processor-flags.h>
- 
- static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
- {
-@@ -23,24 +25,54 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
- 
- /* There are 12 bits of space for ASIDS in CR3 */
- #define CR3_HW_ASID_BITS              12
-+
- /*
-  * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
-  * user/kernel switches
-  */
--#define PTI_CONSUMED_ASID_BITS                0
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+# define PTI_CONSUMED_PCID_BITS       1
-+#else
-+# define PTI_CONSUMED_PCID_BITS       0
-+#endif
-+
-+#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
- 
--#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
- /*
-  * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
-  * for them being zero-based.  Another -1 is because ASID 0 is reserved for
-  * use by non-PCID-aware users.
-  */
--#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
-+#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
-+
-+/*
-+ * 6 because 6 should be plenty and struct tlb_state will fit in two cache
-+ * lines.
-+ */
-+#define TLB_NR_DYN_ASIDS      6
- 
- static inline u16 kern_pcid(u16 asid)
- {
-       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
-+
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+      /*
-+       * Make sure that the dynamic ASID space does not confict with the
-+       * bit we are using to switch between user and kernel ASIDs.
-+       */
-+      BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
-+
-       /*
-+       * The ASID being passed in here should have respected the
-+       * MAX_ASID_AVAILABLE and thus never have the switch bit set.
-+       */
-+      VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
-+#endif
-+      /*
-+       * The dynamically-assigned ASIDs that get passed in are small
-+       * (<TLB_NR_DYN_ASIDS).  They never have the high switch bit set,
-+       * so do not bother to clear it.
-+       *
-        * If PCID is on, ASID-aware code paths put the ASID+1 into the
-        * PCID bits.  This serves two purposes.  It prevents a nasty
-        * situation in which PCID-unaware code saves CR3, loads some other
-@@ -85,12 +117,6 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
-  */
- DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
- 
--/*
-- * 6 because 6 should be plenty and struct tlb_state will fit in
-- * two cache lines.
-- */
--#define TLB_NR_DYN_ASIDS 6
--
- struct tlb_context {
-       u64 ctx_id;
-       u64 tlb_gen;
-@@ -135,6 +161,13 @@ struct tlb_state {
-        */
-       bool invalidate_other;
- 
-+      /*
-+       * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
-+       * the corresponding user PCID needs a flush next time we
-+       * switch to it; see SWITCH_TO_USER_CR3.
-+       */
-+      unsigned short user_pcid_flush_mask;
-+
-       /*
-        * Access to this CR4 shadow and to H/W CR4 is protected by
-        * disabling interrupts when modifying either one.
-@@ -238,15 +271,42 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
- }
- 
- 
-+/*
-+ * Given an ASID, flush the corresponding user ASID.  We can delay this
-+ * until the next time we switch to it.
-+ *
-+ * See SWITCH_TO_USER_CR3.
-+ */
-+static inline void invalidate_user_asid(u16 asid)
-+{
-+      /* There is no user ASID if address space separation is off */
-+      if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
-+              return;
-+
-+      /*
-+       * We only have a single ASID if PCID is off and the CR3
-+       * write will have flushed it.
-+       */
-+      if (!cpu_feature_enabled(X86_FEATURE_PCID))
-+              return;
-+
-+      if (!static_cpu_has(X86_FEATURE_PTI))
-+              return;
-+
-+      __set_bit(kern_pcid(asid),
-+                (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
-+}
-+
- /*
-  * flush the entire current user mapping
-  */
- static inline void __native_flush_tlb(void)
- {
-+      invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
-       /*
--       * If current->mm == NULL then we borrow a mm which may change during a
--       * task switch and therefore we must not be preempted while we write CR3
--       * back:
-+       * If current->mm == NULL then we borrow a mm which may change
-+       * during a task switch and therefore we must not be preempted
-+       * while we write CR3 back:
-        */
-       preempt_disable();
-       native_write_cr3(__native_read_cr3());
-@@ -290,7 +350,14 @@ static inline void __native_flush_tlb_global(void)
-  */
- static inline void __native_flush_tlb_single(unsigned long addr)
- {
-+      u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-+
-       asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
-+
-+      if (!static_cpu_has(X86_FEATURE_PTI))
-+              return;
-+
-+      invalidate_user_asid(loaded_mm_asid);
- }
- 
- /*
-diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
-index 39946d0a1d41..69077da3dbf1 100644
---- a/arch/x86/include/uapi/asm/processor-flags.h
-+++ b/arch/x86/include/uapi/asm/processor-flags.h
-@@ -77,7 +77,12 @@
- #define X86_CR3_PWT           _BITUL(X86_CR3_PWT_BIT)
- #define X86_CR3_PCD_BIT               4 /* Page Cache Disable */
- #define X86_CR3_PCD           _BITUL(X86_CR3_PCD_BIT)
--#define X86_CR3_PCID_MASK     _AC(0x00000fff,UL) /* PCID Mask */
-+
-+#define X86_CR3_PCID_BITS     12
-+#define X86_CR3_PCID_MASK     (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
-+
-+#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
-+#define X86_CR3_PCID_NOFLUSH    _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
- 
- /*
-  * Intel CPU features in CR4
-diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
-index 25b4832e9c28..87c3bafcef2c 100644
---- a/arch/x86/kernel/asm-offsets.c
-+++ b/arch/x86/kernel/asm-offsets.c
-@@ -16,6 +16,7 @@
- #include <asm/sigframe.h>
- #include <asm/bootparam.h>
- #include <asm/suspend.h>
-+#include <asm/tlbflush.h>
- 
- #ifdef CONFIG_XEN
- #include <xen/interface/xen.h>
-@@ -93,6 +94,9 @@ void common(void) {
-       BLANK();
-       DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
- 
-+      /* TLB state for the entry code */
-+      OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
-+
-       /* Layout info for cpu_entry_area */
-       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
-       OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
-diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
-index af75069fb116..caeb8a7bf0a4 100644
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -855,7 +855,7 @@ void __init zone_sizes_init(void)
-       free_area_init_nodes(max_zone_pfns);
- }
- 
--DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
-+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
-       .loaded_mm = &init_mm,
-       .next_asid = 1,
-       .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 353f2f4e1d96..06f3854d0a4f 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -106,6 +106,7 @@ static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
-       unsigned long new_mm_cr3;
- 
-       if (need_flush) {
-+              invalidate_user_asid(new_asid);
-               new_mm_cr3 = build_cr3(pgdir, new_asid);
-       } else {
-               new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 292ccc6ec48d..fb43f14ed299 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -22,7 +22,6 @@
- #include <asm/segment.h>
- #include <asm/cache.h>
- #include <asm/errno.h>
--#include "calling.h"
- #include <asm/asm-offsets.h>
- #include <asm/msr.h>
- #include <asm/unistd.h>
-@@ -39,6 +38,8 @@
- #include <asm/frame.h>
- #include <linux/err.h>
- 
-+#include "calling.h"
-+
- .code64
- .section .entry.text, "ax"
- 
-@@ -405,7 +406,7 @@ syscall_return_via_sysret:
-        * We are on the trampoline stack.  All regs except RDI are live.
-        * We can do future final exit work right here.
-        */
--      SWITCH_TO_USER_CR3 scratch_reg=%rdi
-+      SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
- 
-       popq    %rdi
-       popq    %rsp
-@@ -743,7 +744,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
-        * We can do future final exit work right here.
-        */
- 
--      SWITCH_TO_USER_CR3 scratch_reg=%rdi
-+      SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
- 
-       /* Restore RDI. */
-       popq    %rdi
-@@ -856,7 +857,7 @@ native_irq_return_ldt:
-        */
-       orq     PER_CPU_VAR(espfix_stack), %rax
- 
--      SWITCH_TO_USER_CR3 scratch_reg=%rdi     /* to user CR3 */
-+      SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
-       SWAPGS                                  /* to user GS */
-       popq    %rdi                            /* Restore user RDI */
- 
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index 43f856aeee67..973527e34887 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -274,9 +274,9 @@ sysret32_from_system_call:
-        * switch until after after the last reference to the process
-        * stack.
-        *
--       * %r8 is zeroed before the sysret, thus safe to clobber.
-+       * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
-        */
--      SWITCH_TO_USER_CR3 scratch_reg=%r8
-+      SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
- 
-       xorq    %r8, %r8
-       xorq    %r9, %r9
--- 
-2.14.2
-
diff --git a/patches/kernel/0212-x86-mm-Optimize-RESTORE_CR3.patch b/patches/kernel/0212-x86-mm-Optimize-RESTORE_CR3.patch

deleted file mode 100644 (file)

index 2f04370..0000000
--- a/patches/kernel/0212-x86-mm-Optimize-RESTORE_CR3.patch
+++ /dev/null
@@ -1,127 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Mon, 4 Dec 2017 15:08:00 +0100
-Subject: [PATCH] x86/mm: Optimize RESTORE_CR3
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Most NMI/paranoid exceptions will not in fact change pagetables and would
-thus not require TLB flushing, however RESTORE_CR3 uses flushing CR3
-writes.
-
-Restores to kernel PCIDs can be NOFLUSH, because we explicitly flush the
-kernel mappings and now that we track which user PCIDs need flushing we can
-avoid those too when possible.
-
-This does mean RESTORE_CR3 needs an additional scratch_reg, luckily both
-sites have plenty available.
-
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 21e94459110252d41b45c0c8ba50fd72a664d50c)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 6ebe6e2896841282357d43c09394b0ca47c41e4a)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/calling.h  | 30 ++++++++++++++++++++++++++++--
- arch/x86/entry/entry_64.S |  4 ++--
- 2 files changed, 30 insertions(+), 4 deletions(-)
-
-diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
-index ce5fb309926d..015e0a84bb99 100644
---- a/arch/x86/entry/calling.h
-+++ b/arch/x86/entry/calling.h
-@@ -280,8 +280,34 @@ For 32-bit we have the following conventions - kernel is built with
- .Ldone_\@:
- .endm
- 
--.macro RESTORE_CR3 save_reg:req
-+.macro RESTORE_CR3 scratch_reg:req save_reg:req
-       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
-+
-+      ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
-+
-+      /*
-+       * KERNEL pages can always resume with NOFLUSH as we do
-+       * explicit flushes.
-+       */
-+      bt      $X86_CR3_PTI_SWITCH_BIT, \save_reg
-+      jnc     .Lnoflush_\@
-+
-+      /*
-+       * Check if there's a pending flush for the user ASID we're
-+       * about to set.
-+       */
-+      movq    \save_reg, \scratch_reg
-+      andq    $(0x7FF), \scratch_reg
-+      bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
-+      jnc     .Lnoflush_\@
-+
-+      btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
-+      jmp     .Lwrcr3_\@
-+
-+.Lnoflush_\@:
-+      SET_NOFLUSH_BIT \save_reg
-+
-+.Lwrcr3_\@:
-       /*
-        * The CR3 write could be avoided when not changing its value,
-        * but would require a CR3 read *and* a scratch register.
-@@ -300,7 +326,7 @@ For 32-bit we have the following conventions - kernel is built with
- .endm
- .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
- .endm
--.macro RESTORE_CR3 save_reg:req
-+.macro RESTORE_CR3 scratch_reg:req save_reg:req
- .endm
- 
- #endif
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index fb43f14ed299..b48f2c78a9bf 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -1300,7 +1300,7 @@ ENTRY(paranoid_exit)
-       testl   %ebx, %ebx                      /* swapgs needed? */
-       jnz     .Lparanoid_exit_no_swapgs
-       TRACE_IRQS_IRETQ
--      RESTORE_CR3     save_reg=%r14
-+      RESTORE_CR3     scratch_reg=%rbx save_reg=%r14
-       SWAPGS_UNSAFE_STACK
-       jmp     .Lparanoid_exit_restore
- .Lparanoid_exit_no_swapgs:
-@@ -1742,7 +1742,7 @@ end_repeat_nmi:
-       movq    $-1, %rsi
-       call    do_nmi
- 
--      RESTORE_CR3 save_reg=%r14
-+      RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
- 
-       testl   %ebx, %ebx                      /* swapgs needed? */
-       jnz     nmi_restore
--- 
-2.14.2
-
diff --git a/patches/kernel/0212-x86-mm-Use-Fix-PCID-to-optimize-user-kernel-switches.patch b/patches/kernel/0212-x86-mm-Use-Fix-PCID-to-optimize-user-kernel-switches.patch

new file mode 100644 (file)

index 0000000..5e21faf
--- /dev/null
+++ b/patches/kernel/0212-x86-mm-Use-Fix-PCID-to-optimize-user-kernel-switches.patch
@@ -0,0 +1,497 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 4 Dec 2017 15:07:59 +0100
+Subject: [PATCH] x86/mm: Use/Fix PCID to optimize user/kernel switches
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+We can use PCID to retain the TLBs across CR3 switches; including those now
+part of the user/kernel switch. This increases performance of kernel
+entry/exit at the cost of more expensive/complicated TLB flushing.
+
+Now that we have two address spaces, one for kernel and one for user space,
+we need two PCIDs per mm. We use the top PCID bit to indicate a user PCID
+(just like we use the PFN LSB for the PGD). Since we do TLB invalidation
+from kernel space, the existing code will only invalidate the kernel PCID,
+we augment that by marking the corresponding user PCID invalid, and upon
+switching back to userspace, use a flushing CR3 write for the switch.
+
+In order to access the user_pcid_flush_mask we use PER_CPU storage, which
+means the previously established SWAPGS vs CR3 ordering is now mandatory
+and required.
+
+Having to do this memory access does require additional registers, most
+sites have a functioning stack and we can spill one (RAX), sites without
+functional stack need to otherwise provide the second scratch register.
+
+Note: PCID is generally available on Intel Sandybridge and later CPUs.
+Note: Up until this point TLB flushing was broken in this series.
+
+Based-on-code-from: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(backported from commit 6fd166aae78c0ab738d49bda653cbd9e3b1491cf)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit ac7471365d49c0a91d4b63453eb848cc19f17589)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/calling.h                    | 72 ++++++++++++++++++-----
+ arch/x86/include/asm/processor-flags.h      |  5 ++
+ arch/x86/include/asm/tlbflush.h             | 91 +++++++++++++++++++++++++----
+ arch/x86/include/uapi/asm/processor-flags.h |  7 ++-
+ arch/x86/kernel/asm-offsets.c               |  4 ++
+ arch/x86/mm/init.c                          |  2 +-
+ arch/x86/mm/tlb.c                           |  1 +
+ arch/x86/entry/entry_64.S                   |  9 +--
+ arch/x86/entry/entry_64_compat.S            |  4 +-
+ 9 files changed, 162 insertions(+), 33 deletions(-)
+
+diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
+index bb56f5346ae8..ce5fb309926d 100644
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -2,6 +2,9 @@
+ #include <asm/unwind_hints.h>
+ #include <asm/cpufeatures.h>
+ #include <asm/page_types.h>
++#include <asm/percpu.h>
++#include <asm/asm-offsets.h>
++#include <asm/processor-flags.h>
+ 
+ /*
+ 
+@@ -190,17 +193,21 @@ For 32-bit we have the following conventions - kernel is built with
+ 
+ #ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 
+-/* PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two halves: */
+-#define PTI_SWITCH_MASK (1<<PAGE_SHIFT)
++/*
++ * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
++ * halves:
++ */
++#define PTI_SWITCH_PGTABLES_MASK      (1<<PAGE_SHIFT)
++#define PTI_SWITCH_MASK               (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
+ 
+-.macro ADJUST_KERNEL_CR3 reg:req
+-      /* Clear "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
+-      andq    $(~PTI_SWITCH_MASK), \reg
++.macro SET_NOFLUSH_BIT        reg:req
++      bts     $X86_CR3_PCID_NOFLUSH_BIT, \reg
+ .endm
+ 
+-.macro ADJUST_USER_CR3 reg:req
+-      /* Move CR3 up a page to the user page tables: */
+-      orq     $(PTI_SWITCH_MASK), \reg
++.macro ADJUST_KERNEL_CR3 reg:req
++      ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
++      /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
++      andq    $(~PTI_SWITCH_MASK), \reg
+ .endm
+ 
+ .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+@@ -211,21 +218,58 @@ For 32-bit we have the following conventions - kernel is built with
+ .Lend_\@:
+ .endm
+ 
+-.macro SWITCH_TO_USER_CR3 scratch_reg:req
++#define THIS_CPU_user_pcid_flush_mask   \
++      PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
++
++.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+       mov     %cr3, \scratch_reg
+-      ADJUST_USER_CR3 \scratch_reg
++
++      ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
++
++      /*
++       * Test if the ASID needs a flush.
++       */
++      movq    \scratch_reg, \scratch_reg2
++      andq    $(0x7FF), \scratch_reg          /* mask ASID */
++      bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
++      jnc     .Lnoflush_\@
++
++      /* Flush needed, clear the bit */
++      btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
++      movq    \scratch_reg2, \scratch_reg
++      jmp     .Lwrcr3_\@
++
++.Lnoflush_\@:
++      movq    \scratch_reg2, \scratch_reg
++      SET_NOFLUSH_BIT \scratch_reg
++
++.Lwrcr3_\@:
++      /* Flip the PGD and ASID to the user version */
++      orq     $(PTI_SWITCH_MASK), \scratch_reg
+       mov     \scratch_reg, %cr3
+ .Lend_\@:
+ .endm
+ 
++.macro SWITCH_TO_USER_CR3_STACK       scratch_reg:req
++      pushq   %rax
++      SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
++      popq    %rax
++.endm
++
+ .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+       ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
+       movq    %cr3, \scratch_reg
+       movq    \scratch_reg, \save_reg
+       /*
+-       * Is the switch bit zero?  This means the address is
+-       * up in real PAGE_TABLE_ISOLATION patches in a moment.
++       * Is the "switch mask" all zero?  That means that both of
++       * these are zero:
++       *
++       *      1. The user/kernel PCID bit, and
++       *      2. The user/kernel "bit" that points CR3 to the
++       *         bottom half of the 8k PGD
++       *
++       * That indicates a kernel CR3 value, not a user CR3.
+        */
+       testq   $(PTI_SWITCH_MASK), \scratch_reg
+       jz      .Ldone_\@
+@@ -250,7 +294,9 @@ For 32-bit we have the following conventions - kernel is built with
+ 
+ .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+ .endm
+-.macro SWITCH_TO_USER_CR3 scratch_reg:req
++.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
++.endm
++.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
+ .endm
+ .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+ .endm
+diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
+index 791b60199aa4..fb9708d13761 100644
+--- a/arch/x86/include/asm/processor-flags.h
++++ b/arch/x86/include/asm/processor-flags.h
+@@ -36,6 +36,11 @@
+ #define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull
+ #define CR3_PCID_MASK 0xFFFull
+ #define CR3_NOFLUSH (1UL << 63)
++
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++# define X86_CR3_PTI_SWITCH_BIT       11
++#endif
++
+ #else
+ /*
+  * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 3769ce182eac..2b7b32c243f1 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -9,6 +9,8 @@
+ #include <asm/special_insns.h>
+ #include <asm/smp.h>
+ #include <asm/invpcid.h>
++#include <asm/pti.h>
++#include <asm/processor-flags.h>
+ 
+ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ {
+@@ -23,24 +25,54 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ 
+ /* There are 12 bits of space for ASIDS in CR3 */
+ #define CR3_HW_ASID_BITS              12
++
+ /*
+  * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
+  * user/kernel switches
+  */
+-#define PTI_CONSUMED_ASID_BITS                0
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++# define PTI_CONSUMED_PCID_BITS       1
++#else
++# define PTI_CONSUMED_PCID_BITS       0
++#endif
++
++#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
+ 
+-#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
+ /*
+  * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
+  * for them being zero-based.  Another -1 is because ASID 0 is reserved for
+  * use by non-PCID-aware users.
+  */
+-#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
++#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
++
++/*
++ * 6 because 6 should be plenty and struct tlb_state will fit in two cache
++ * lines.
++ */
++#define TLB_NR_DYN_ASIDS      6
+ 
+ static inline u16 kern_pcid(u16 asid)
+ {
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
++
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++      /*
++       * Make sure that the dynamic ASID space does not confict with the
++       * bit we are using to switch between user and kernel ASIDs.
++       */
++      BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
++
+       /*
++       * The ASID being passed in here should have respected the
++       * MAX_ASID_AVAILABLE and thus never have the switch bit set.
++       */
++      VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
++#endif
++      /*
++       * The dynamically-assigned ASIDs that get passed in are small
++       * (<TLB_NR_DYN_ASIDS).  They never have the high switch bit set,
++       * so do not bother to clear it.
++       *
+        * If PCID is on, ASID-aware code paths put the ASID+1 into the
+        * PCID bits.  This serves two purposes.  It prevents a nasty
+        * situation in which PCID-unaware code saves CR3, loads some other
+@@ -85,12 +117,6 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+  */
+ DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
+ 
+-/*
+- * 6 because 6 should be plenty and struct tlb_state will fit in
+- * two cache lines.
+- */
+-#define TLB_NR_DYN_ASIDS 6
+-
+ struct tlb_context {
+       u64 ctx_id;
+       u64 tlb_gen;
+@@ -135,6 +161,13 @@ struct tlb_state {
+        */
+       bool invalidate_other;
+ 
++      /*
++       * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
++       * the corresponding user PCID needs a flush next time we
++       * switch to it; see SWITCH_TO_USER_CR3.
++       */
++      unsigned short user_pcid_flush_mask;
++
+       /*
+        * Access to this CR4 shadow and to H/W CR4 is protected by
+        * disabling interrupts when modifying either one.
+@@ -238,15 +271,42 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
+ }
+ 
+ 
++/*
++ * Given an ASID, flush the corresponding user ASID.  We can delay this
++ * until the next time we switch to it.
++ *
++ * See SWITCH_TO_USER_CR3.
++ */
++static inline void invalidate_user_asid(u16 asid)
++{
++      /* There is no user ASID if address space separation is off */
++      if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
++              return;
++
++      /*
++       * We only have a single ASID if PCID is off and the CR3
++       * write will have flushed it.
++       */
++      if (!cpu_feature_enabled(X86_FEATURE_PCID))
++              return;
++
++      if (!static_cpu_has(X86_FEATURE_PTI))
++              return;
++
++      __set_bit(kern_pcid(asid),
++                (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
++}
++
+ /*
+  * flush the entire current user mapping
+  */
+ static inline void __native_flush_tlb(void)
+ {
++      invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+       /*
+-       * If current->mm == NULL then we borrow a mm which may change during a
+-       * task switch and therefore we must not be preempted while we write CR3
+-       * back:
++       * If current->mm == NULL then we borrow a mm which may change
++       * during a task switch and therefore we must not be preempted
++       * while we write CR3 back:
+        */
+       preempt_disable();
+       native_write_cr3(__native_read_cr3());
+@@ -290,7 +350,14 @@ static inline void __native_flush_tlb_global(void)
+  */
+ static inline void __native_flush_tlb_single(unsigned long addr)
+ {
++      u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
++
+       asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
++
++      if (!static_cpu_has(X86_FEATURE_PTI))
++              return;
++
++      invalidate_user_asid(loaded_mm_asid);
+ }
+ 
+ /*
+diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
+index 39946d0a1d41..69077da3dbf1 100644
+--- a/arch/x86/include/uapi/asm/processor-flags.h
++++ b/arch/x86/include/uapi/asm/processor-flags.h
+@@ -77,7 +77,12 @@
+ #define X86_CR3_PWT           _BITUL(X86_CR3_PWT_BIT)
+ #define X86_CR3_PCD_BIT               4 /* Page Cache Disable */
+ #define X86_CR3_PCD           _BITUL(X86_CR3_PCD_BIT)
+-#define X86_CR3_PCID_MASK     _AC(0x00000fff,UL) /* PCID Mask */
++
++#define X86_CR3_PCID_BITS     12
++#define X86_CR3_PCID_MASK     (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
++
++#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
++#define X86_CR3_PCID_NOFLUSH    _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
+ 
+ /*
+  * Intel CPU features in CR4
+diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
+index 25b4832e9c28..87c3bafcef2c 100644
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -16,6 +16,7 @@
+ #include <asm/sigframe.h>
+ #include <asm/bootparam.h>
+ #include <asm/suspend.h>
++#include <asm/tlbflush.h>
+ 
+ #ifdef CONFIG_XEN
+ #include <xen/interface/xen.h>
+@@ -93,6 +94,9 @@ void common(void) {
+       BLANK();
+       DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+ 
++      /* TLB state for the entry code */
++      OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
++
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+       OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index af75069fb116..caeb8a7bf0a4 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -855,7 +855,7 @@ void __init zone_sizes_init(void)
+       free_area_init_nodes(max_zone_pfns);
+ }
+ 
+-DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
++__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+       .loaded_mm = &init_mm,
+       .next_asid = 1,
+       .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 353f2f4e1d96..06f3854d0a4f 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -106,6 +106,7 @@ static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+       unsigned long new_mm_cr3;
+ 
+       if (need_flush) {
++              invalidate_user_asid(new_asid);
+               new_mm_cr3 = build_cr3(pgdir, new_asid);
+       } else {
+               new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 292ccc6ec48d..fb43f14ed299 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -22,7 +22,6 @@
+ #include <asm/segment.h>
+ #include <asm/cache.h>
+ #include <asm/errno.h>
+-#include "calling.h"
+ #include <asm/asm-offsets.h>
+ #include <asm/msr.h>
+ #include <asm/unistd.h>
+@@ -39,6 +38,8 @@
+ #include <asm/frame.h>
+ #include <linux/err.h>
+ 
++#include "calling.h"
++
+ .code64
+ .section .entry.text, "ax"
+ 
+@@ -405,7 +406,7 @@ syscall_return_via_sysret:
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
+-      SWITCH_TO_USER_CR3 scratch_reg=%rdi
++      SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+ 
+       popq    %rdi
+       popq    %rsp
+@@ -743,7 +744,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+        * We can do future final exit work right here.
+        */
+ 
+-      SWITCH_TO_USER_CR3 scratch_reg=%rdi
++      SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+ 
+       /* Restore RDI. */
+       popq    %rdi
+@@ -856,7 +857,7 @@ native_irq_return_ldt:
+        */
+       orq     PER_CPU_VAR(espfix_stack), %rax
+ 
+-      SWITCH_TO_USER_CR3 scratch_reg=%rdi     /* to user CR3 */
++      SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+       SWAPGS                                  /* to user GS */
+       popq    %rdi                            /* Restore user RDI */
+ 
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index 43f856aeee67..973527e34887 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -274,9 +274,9 @@ sysret32_from_system_call:
+        * switch until after after the last reference to the process
+        * stack.
+        *
+-       * %r8 is zeroed before the sysret, thus safe to clobber.
++       * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
+        */
+-      SWITCH_TO_USER_CR3 scratch_reg=%r8
++      SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
+ 
+       xorq    %r8, %r8
+       xorq    %r9, %r9
+-- 
+2.14.2
+
diff --git a/patches/kernel/0213-x86-mm-Optimize-RESTORE_CR3.patch b/patches/kernel/0213-x86-mm-Optimize-RESTORE_CR3.patch

new file mode 100644 (file)

index 0000000..2f04370
--- /dev/null
+++ b/patches/kernel/0213-x86-mm-Optimize-RESTORE_CR3.patch
@@ -0,0 +1,127 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 4 Dec 2017 15:08:00 +0100
+Subject: [PATCH] x86/mm: Optimize RESTORE_CR3
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Most NMI/paranoid exceptions will not in fact change pagetables and would
+thus not require TLB flushing, however RESTORE_CR3 uses flushing CR3
+writes.
+
+Restores to kernel PCIDs can be NOFLUSH, because we explicitly flush the
+kernel mappings and now that we track which user PCIDs need flushing we can
+avoid those too when possible.
+
+This does mean RESTORE_CR3 needs an additional scratch_reg, luckily both
+sites have plenty available.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 21e94459110252d41b45c0c8ba50fd72a664d50c)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 6ebe6e2896841282357d43c09394b0ca47c41e4a)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/calling.h  | 30 ++++++++++++++++++++++++++++--
+ arch/x86/entry/entry_64.S |  4 ++--
+ 2 files changed, 30 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
+index ce5fb309926d..015e0a84bb99 100644
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -280,8 +280,34 @@ For 32-bit we have the following conventions - kernel is built with
+ .Ldone_\@:
+ .endm
+ 
+-.macro RESTORE_CR3 save_reg:req
++.macro RESTORE_CR3 scratch_reg:req save_reg:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
++
++      ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
++
++      /*
++       * KERNEL pages can always resume with NOFLUSH as we do
++       * explicit flushes.
++       */
++      bt      $X86_CR3_PTI_SWITCH_BIT, \save_reg
++      jnc     .Lnoflush_\@
++
++      /*
++       * Check if there's a pending flush for the user ASID we're
++       * about to set.
++       */
++      movq    \save_reg, \scratch_reg
++      andq    $(0x7FF), \scratch_reg
++      bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
++      jnc     .Lnoflush_\@
++
++      btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
++      jmp     .Lwrcr3_\@
++
++.Lnoflush_\@:
++      SET_NOFLUSH_BIT \save_reg
++
++.Lwrcr3_\@:
+       /*
+        * The CR3 write could be avoided when not changing its value,
+        * but would require a CR3 read *and* a scratch register.
+@@ -300,7 +326,7 @@ For 32-bit we have the following conventions - kernel is built with
+ .endm
+ .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+ .endm
+-.macro RESTORE_CR3 save_reg:req
++.macro RESTORE_CR3 scratch_reg:req save_reg:req
+ .endm
+ 
+ #endif
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index fb43f14ed299..b48f2c78a9bf 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1300,7 +1300,7 @@ ENTRY(paranoid_exit)
+       testl   %ebx, %ebx                      /* swapgs needed? */
+       jnz     .Lparanoid_exit_no_swapgs
+       TRACE_IRQS_IRETQ
+-      RESTORE_CR3     save_reg=%r14
++      RESTORE_CR3     scratch_reg=%rbx save_reg=%r14
+       SWAPGS_UNSAFE_STACK
+       jmp     .Lparanoid_exit_restore
+ .Lparanoid_exit_no_swapgs:
+@@ -1742,7 +1742,7 @@ end_repeat_nmi:
+       movq    $-1, %rsi
+       call    do_nmi
+ 
+-      RESTORE_CR3 save_reg=%r14
++      RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
+ 
+       testl   %ebx, %ebx                      /* swapgs needed? */
+       jnz     nmi_restore
+-- 
+2.14.2
+
diff --git a/patches/kernel/0213-x86-mm-Use-INVPCID-for-__native_flush_tlb_single.patch b/patches/kernel/0213-x86-mm-Use-INVPCID-for-__native_flush_tlb_single.patch

deleted file mode 100644 (file)

index 61216e3..0000000
--- a/patches/kernel/0213-x86-mm-Use-INVPCID-for-__native_flush_tlb_single.patch
+++ /dev/null
@@ -1,194 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:08:01 +0100
-Subject: [PATCH] x86/mm: Use INVPCID for __native_flush_tlb_single()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This uses INVPCID to shoot down individual lines of the user mapping
-instead of marking the entire user map as invalid. This
-could/might/possibly be faster.
-
-This for sure needs tlb_single_page_flush_ceiling to be redetermined;
-esp. since INVPCID is _slow_.
-
-A detailed performance analysis is available here:
-
-  https://lkml.kernel.org/r/3062e486-3539-8a1f-5724-16199420be71@intel.com
-
-[ Peterz: Split out from big combo patch ]
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 6cff64b86aaaa07f89f50498055a20e45754b0c1)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit e4986a4e89c0eb40f824a8505feefff3328ad4b2)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cpufeatures.h |  1 +
- arch/x86/include/asm/tlbflush.h    | 23 +++++++++++++-
- arch/x86/mm/init.c                 | 64 ++++++++++++++++++++++----------------
- 3 files changed, 60 insertions(+), 28 deletions(-)
-
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index de4e91452de4..9b0c283afcf0 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -196,6 +196,7 @@
- #define X86_FEATURE_CAT_L3            ( 7*32+ 4) /* Cache Allocation Technology L3 */
- #define X86_FEATURE_CAT_L2            ( 7*32+ 5) /* Cache Allocation Technology L2 */
- #define X86_FEATURE_CDP_L3            ( 7*32+ 6) /* Code and Data Prioritization L3 */
-+#define X86_FEATURE_INVPCID_SINGLE    ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
- 
- #define X86_FEATURE_HW_PSTATE         ( 7*32+ 8) /* AMD HW-PState */
- #define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 2b7b32c243f1..979e590648a5 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -84,6 +84,18 @@ static inline u16 kern_pcid(u16 asid)
-       return asid + 1;
- }
- 
-+/*
-+ * The user PCID is just the kernel one, plus the "switch bit".
-+ */
-+static inline u16 user_pcid(u16 asid)
-+{
-+      u16 ret = kern_pcid(asid);
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+      ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
-+#endif
-+      return ret;
-+}
-+
- struct pgd_t;
- static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
- {
-@@ -324,6 +336,8 @@ static inline void __native_flush_tlb_global(void)
-               /*
-                * Using INVPCID is considerably faster than a pair of writes
-                * to CR4 sandwiched inside an IRQ flag save/restore.
-+               *
-+               * Note, this works with CR4.PCIDE=0 or 1.
-                */
-               invpcid_flush_all();
-               return;
-@@ -357,7 +371,14 @@ static inline void __native_flush_tlb_single(unsigned long addr)
-       if (!static_cpu_has(X86_FEATURE_PTI))
-               return;
- 
--      invalidate_user_asid(loaded_mm_asid);
-+      /*
-+       * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
-+       * Just use invalidate_user_asid() in case we are called early.
-+       */
-+      if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
-+              invalidate_user_asid(loaded_mm_asid);
-+      else
-+              invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
- }
- 
- /*
-diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
-index caeb8a7bf0a4..80259ad8c386 100644
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -203,34 +203,44 @@ static void __init probe_page_size_mask(void)
- 
- static void setup_pcid(void)
- {
--#ifdef CONFIG_X86_64
--      if (boot_cpu_has(X86_FEATURE_PCID)) {
--              if (boot_cpu_has(X86_FEATURE_PGE)) {
--                      /*
--                       * This can't be cr4_set_bits_and_update_boot() --
--                       * the trampoline code can't handle CR4.PCIDE and
--                       * it wouldn't do any good anyway.  Despite the name,
--                       * cr4_set_bits_and_update_boot() doesn't actually
--                       * cause the bits in question to remain set all the
--                       * way through the secondary boot asm.
--                       *
--                       * Instead, we brute-force it and set CR4.PCIDE
--                       * manually in start_secondary().
--                       */
--                      cr4_set_bits(X86_CR4_PCIDE);
--              } else {
--                      /*
--                       * flush_tlb_all(), as currently implemented, won't
--                       * work if PCID is on but PGE is not.  Since that
--                       * combination doesn't exist on real hardware, there's
--                       * no reason to try to fully support it, but it's
--                       * polite to avoid corrupting data if we're on
--                       * an improperly configured VM.
--                       */
--                      setup_clear_cpu_cap(X86_FEATURE_PCID);
--              }
-+      if (!IS_ENABLED(CONFIG_X86_64))
-+              return;
-+
-+      if (!boot_cpu_has(X86_FEATURE_PCID))
-+              return;
-+
-+      if (boot_cpu_has(X86_FEATURE_PGE)) {
-+              /*
-+               * This can't be cr4_set_bits_and_update_boot() -- the
-+               * trampoline code can't handle CR4.PCIDE and it wouldn't
-+               * do any good anyway.  Despite the name,
-+               * cr4_set_bits_and_update_boot() doesn't actually cause
-+               * the bits in question to remain set all the way through
-+               * the secondary boot asm.
-+               *
-+               * Instead, we brute-force it and set CR4.PCIDE manually in
-+               * start_secondary().
-+               */
-+              cr4_set_bits(X86_CR4_PCIDE);
-+
-+              /*
-+               * INVPCID's single-context modes (2/3) only work if we set
-+               * X86_CR4_PCIDE, *and* we INVPCID support.  It's unusable
-+               * on systems that have X86_CR4_PCIDE clear, or that have
-+               * no INVPCID support at all.
-+               */
-+              if (boot_cpu_has(X86_FEATURE_INVPCID))
-+                      setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
-+      } else {
-+              /*
-+               * flush_tlb_all(), as currently implemented, won't work if
-+               * PCID is on but PGE is not.  Since that combination
-+               * doesn't exist on real hardware, there's no reason to try
-+               * to fully support it, but it's polite to avoid corrupting
-+               * data if we're on an improperly configured VM.
-+               */
-+              setup_clear_cpu_cap(X86_FEATURE_PCID);
-       }
--#endif
- }
- 
- #ifdef CONFIG_X86_32
--- 
-2.14.2
-
diff --git a/patches/kernel/0214-x86-mm-Clarify-the-whole-ASID-kernel-PCID-user-PCID-.patch b/patches/kernel/0214-x86-mm-Clarify-the-whole-ASID-kernel-PCID-user-PCID-.patch

deleted file mode 100644 (file)

index 1aee978..0000000
--- a/patches/kernel/0214-x86-mm-Clarify-the-whole-ASID-kernel-PCID-user-PCID-.patch
+++ /dev/null
@@ -1,142 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Tue, 5 Dec 2017 13:34:53 +0100
-Subject: [PATCH] x86/mm: Clarify the whole ASID/kernel PCID/user PCID naming
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Ideally we'd also use sparse to enforce this separation so it becomes much
-more difficult to mess up.
-
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 0a126abd576ebc6403f063dbe20cf7416c9d9393)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 2ee6efc0f708e21cfd08471132ac2255fac54553)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/tlbflush.h | 55 ++++++++++++++++++++++++++++++++---------
- 1 file changed, 43 insertions(+), 12 deletions(-)
-
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 979e590648a5..7a04a1f1ca11 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -12,16 +12,33 @@
- #include <asm/pti.h>
- #include <asm/processor-flags.h>
- 
--static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
--{
--      /*
--       * Bump the generation count.  This also serves as a full barrier
--       * that synchronizes with switch_mm(): callers are required to order
--       * their read of mm_cpumask after their writes to the paging
--       * structures.
--       */
--      return atomic64_inc_return(&mm->context.tlb_gen);
--}
-+/*
-+ * The x86 feature is called PCID (Process Context IDentifier). It is similar
-+ * to what is traditionally called ASID on the RISC processors.
-+ *
-+ * We don't use the traditional ASID implementation, where each process/mm gets
-+ * its own ASID and flush/restart when we run out of ASID space.
-+ *
-+ * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
-+ * that came by on this CPU, allowing cheaper switch_mm between processes on
-+ * this CPU.
-+ *
-+ * We end up with different spaces for different things. To avoid confusion we
-+ * use different names for each of them:
-+ *
-+ * ASID  - [0, TLB_NR_DYN_ASIDS-1]
-+ *         the canonical identifier for an mm
-+ *
-+ * kPCID - [1, TLB_NR_DYN_ASIDS]
-+ *         the value we write into the PCID part of CR3; corresponds to the
-+ *         ASID+1, because PCID 0 is special.
-+ *
-+ * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
-+ *         for KPTI each mm has two address spaces and thus needs two
-+ *         PCID values, but we can still do with a single ASID denomination
-+ *         for each mm. Corresponds to kPCID + 2048.
-+ *
-+ */
- 
- /* There are 12 bits of space for ASIDS in CR3 */
- #define CR3_HW_ASID_BITS              12
-@@ -40,7 +57,7 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
- 
- /*
-  * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
-- * for them being zero-based.  Another -1 is because ASID 0 is reserved for
-+ * for them being zero-based.  Another -1 is because PCID 0 is reserved for
-  * use by non-PCID-aware users.
-  */
- #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
-@@ -51,6 +68,9 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
-  */
- #define TLB_NR_DYN_ASIDS      6
- 
-+/*
-+ * Given @asid, compute kPCID
-+ */
- static inline u16 kern_pcid(u16 asid)
- {
-       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
-@@ -85,7 +105,7 @@ static inline u16 kern_pcid(u16 asid)
- }
- 
- /*
-- * The user PCID is just the kernel one, plus the "switch bit".
-+ * Given @asid, compute uPCID
-  */
- static inline u16 user_pcid(u16 asid)
- {
-@@ -473,6 +493,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
- void native_flush_tlb_others(const struct cpumask *cpumask,
-                            const struct flush_tlb_info *info);
- 
-+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
-+{
-+      /*
-+       * Bump the generation count.  This also serves as a full barrier
-+       * that synchronizes with switch_mm(): callers are required to order
-+       * their read of mm_cpumask after their writes to the paging
-+       * structures.
-+       */
-+      return atomic64_inc_return(&mm->context.tlb_gen);
-+}
-+
- static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
-                                       struct mm_struct *mm)
- {
--- 
-2.14.2
-
diff --git a/patches/kernel/0214-x86-mm-Use-INVPCID-for-__native_flush_tlb_single.patch b/patches/kernel/0214-x86-mm-Use-INVPCID-for-__native_flush_tlb_single.patch

new file mode 100644 (file)

index 0000000..61216e3
--- /dev/null
+++ b/patches/kernel/0214-x86-mm-Use-INVPCID-for-__native_flush_tlb_single.patch
@@ -0,0 +1,194 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:08:01 +0100
+Subject: [PATCH] x86/mm: Use INVPCID for __native_flush_tlb_single()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This uses INVPCID to shoot down individual lines of the user mapping
+instead of marking the entire user map as invalid. This
+could/might/possibly be faster.
+
+This for sure needs tlb_single_page_flush_ceiling to be redetermined;
+esp. since INVPCID is _slow_.
+
+A detailed performance analysis is available here:
+
+  https://lkml.kernel.org/r/3062e486-3539-8a1f-5724-16199420be71@intel.com
+
+[ Peterz: Split out from big combo patch ]
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 6cff64b86aaaa07f89f50498055a20e45754b0c1)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit e4986a4e89c0eb40f824a8505feefff3328ad4b2)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cpufeatures.h |  1 +
+ arch/x86/include/asm/tlbflush.h    | 23 +++++++++++++-
+ arch/x86/mm/init.c                 | 64 ++++++++++++++++++++++----------------
+ 3 files changed, 60 insertions(+), 28 deletions(-)
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index de4e91452de4..9b0c283afcf0 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -196,6 +196,7 @@
+ #define X86_FEATURE_CAT_L3            ( 7*32+ 4) /* Cache Allocation Technology L3 */
+ #define X86_FEATURE_CAT_L2            ( 7*32+ 5) /* Cache Allocation Technology L2 */
+ #define X86_FEATURE_CDP_L3            ( 7*32+ 6) /* Code and Data Prioritization L3 */
++#define X86_FEATURE_INVPCID_SINGLE    ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
+ 
+ #define X86_FEATURE_HW_PSTATE         ( 7*32+ 8) /* AMD HW-PState */
+ #define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 2b7b32c243f1..979e590648a5 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -84,6 +84,18 @@ static inline u16 kern_pcid(u16 asid)
+       return asid + 1;
+ }
+ 
++/*
++ * The user PCID is just the kernel one, plus the "switch bit".
++ */
++static inline u16 user_pcid(u16 asid)
++{
++      u16 ret = kern_pcid(asid);
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++      ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
++#endif
++      return ret;
++}
++
+ struct pgd_t;
+ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+ {
+@@ -324,6 +336,8 @@ static inline void __native_flush_tlb_global(void)
+               /*
+                * Using INVPCID is considerably faster than a pair of writes
+                * to CR4 sandwiched inside an IRQ flag save/restore.
++               *
++               * Note, this works with CR4.PCIDE=0 or 1.
+                */
+               invpcid_flush_all();
+               return;
+@@ -357,7 +371,14 @@ static inline void __native_flush_tlb_single(unsigned long addr)
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+ 
+-      invalidate_user_asid(loaded_mm_asid);
++      /*
++       * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
++       * Just use invalidate_user_asid() in case we are called early.
++       */
++      if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
++              invalidate_user_asid(loaded_mm_asid);
++      else
++              invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
+ }
+ 
+ /*
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index caeb8a7bf0a4..80259ad8c386 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -203,34 +203,44 @@ static void __init probe_page_size_mask(void)
+ 
+ static void setup_pcid(void)
+ {
+-#ifdef CONFIG_X86_64
+-      if (boot_cpu_has(X86_FEATURE_PCID)) {
+-              if (boot_cpu_has(X86_FEATURE_PGE)) {
+-                      /*
+-                       * This can't be cr4_set_bits_and_update_boot() --
+-                       * the trampoline code can't handle CR4.PCIDE and
+-                       * it wouldn't do any good anyway.  Despite the name,
+-                       * cr4_set_bits_and_update_boot() doesn't actually
+-                       * cause the bits in question to remain set all the
+-                       * way through the secondary boot asm.
+-                       *
+-                       * Instead, we brute-force it and set CR4.PCIDE
+-                       * manually in start_secondary().
+-                       */
+-                      cr4_set_bits(X86_CR4_PCIDE);
+-              } else {
+-                      /*
+-                       * flush_tlb_all(), as currently implemented, won't
+-                       * work if PCID is on but PGE is not.  Since that
+-                       * combination doesn't exist on real hardware, there's
+-                       * no reason to try to fully support it, but it's
+-                       * polite to avoid corrupting data if we're on
+-                       * an improperly configured VM.
+-                       */
+-                      setup_clear_cpu_cap(X86_FEATURE_PCID);
+-              }
++      if (!IS_ENABLED(CONFIG_X86_64))
++              return;
++
++      if (!boot_cpu_has(X86_FEATURE_PCID))
++              return;
++
++      if (boot_cpu_has(X86_FEATURE_PGE)) {
++              /*
++               * This can't be cr4_set_bits_and_update_boot() -- the
++               * trampoline code can't handle CR4.PCIDE and it wouldn't
++               * do any good anyway.  Despite the name,
++               * cr4_set_bits_and_update_boot() doesn't actually cause
++               * the bits in question to remain set all the way through
++               * the secondary boot asm.
++               *
++               * Instead, we brute-force it and set CR4.PCIDE manually in
++               * start_secondary().
++               */
++              cr4_set_bits(X86_CR4_PCIDE);
++
++              /*
++               * INVPCID's single-context modes (2/3) only work if we set
++               * X86_CR4_PCIDE, *and* we INVPCID support.  It's unusable
++               * on systems that have X86_CR4_PCIDE clear, or that have
++               * no INVPCID support at all.
++               */
++              if (boot_cpu_has(X86_FEATURE_INVPCID))
++                      setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
++      } else {
++              /*
++               * flush_tlb_all(), as currently implemented, won't work if
++               * PCID is on but PGE is not.  Since that combination
++               * doesn't exist on real hardware, there's no reason to try
++               * to fully support it, but it's polite to avoid corrupting
++               * data if we're on an improperly configured VM.
++               */
++              setup_clear_cpu_cap(X86_FEATURE_PCID);
+       }
+-#endif
+ }
+ 
+ #ifdef CONFIG_X86_32
+-- 
+2.14.2
+
diff --git a/patches/kernel/0215-x86-dumpstack-Indicate-in-Oops-whether-PTI-is-config.patch b/patches/kernel/0215-x86-dumpstack-Indicate-in-Oops-whether-PTI-is-config.patch

deleted file mode 100644 (file)

index 171622c..0000000
--- a/patches/kernel/0215-x86-dumpstack-Indicate-in-Oops-whether-PTI-is-config.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vlastimil Babka <vbabka@suse.cz>
-Date: Tue, 19 Dec 2017 22:33:46 +0100
-Subject: [PATCH] x86/dumpstack: Indicate in Oops whether PTI is configured and
- enabled
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-CONFIG_PAGE_TABLE_ISOLATION is relatively new and intrusive feature that may
-still have some corner cases which could take some time to manifest and be
-fixed. It would be useful to have Oops messages indicate whether it was
-enabled for building the kernel, and whether it was disabled during boot.
-
-Example of fully enabled:
-
-       Oops: 0001 [#1] SMP PTI
-
-Example of enabled during build, but disabled during boot:
-
-       Oops: 0001 [#1] SMP NOPTI
-
-We can decide to remove this after the feature has been tested in the field
-long enough.
-
-[ tglx: Made it use boot_cpu_has() as requested by Borislav ]
-
-Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Eduardo Valentin <eduval@amazon.com>
-Acked-by: Dave Hansen <dave.hansen@intel.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Andy Lutomirsky <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: bpetkov@suse.de
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: jkosina@suse.cz
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 5f26d76c3fd67c48806415ef8b1116c97beff8ba)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 7edb91fcc96589ad6b80446ec3835f83ffabb710)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/dumpstack.c | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index 2bdeb983b9d8..19a936e9b259 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -298,11 +298,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
-       unsigned long sp;
- #endif
-       printk(KERN_DEFAULT
--             "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
-+             "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
-              IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT"         : "",
-              IS_ENABLED(CONFIG_SMP)     ? " SMP"             : "",
-              debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
--             IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "");
-+             IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "",
-+             IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
-+             (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
- 
-       if (notify_die(DIE_OOPS, str, regs, err,
-                       current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
--- 
-2.14.2
-
diff --git a/patches/kernel/0215-x86-mm-Clarify-the-whole-ASID-kernel-PCID-user-PCID-.patch b/patches/kernel/0215-x86-mm-Clarify-the-whole-ASID-kernel-PCID-user-PCID-.patch

new file mode 100644 (file)

index 0000000..1aee978
--- /dev/null
+++ b/patches/kernel/0215-x86-mm-Clarify-the-whole-ASID-kernel-PCID-user-PCID-.patch
@@ -0,0 +1,142 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 5 Dec 2017 13:34:53 +0100
+Subject: [PATCH] x86/mm: Clarify the whole ASID/kernel PCID/user PCID naming
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Ideally we'd also use sparse to enforce this separation so it becomes much
+more difficult to mess up.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 0a126abd576ebc6403f063dbe20cf7416c9d9393)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 2ee6efc0f708e21cfd08471132ac2255fac54553)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/tlbflush.h | 55 ++++++++++++++++++++++++++++++++---------
+ 1 file changed, 43 insertions(+), 12 deletions(-)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 979e590648a5..7a04a1f1ca11 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -12,16 +12,33 @@
+ #include <asm/pti.h>
+ #include <asm/processor-flags.h>
+ 
+-static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+-{
+-      /*
+-       * Bump the generation count.  This also serves as a full barrier
+-       * that synchronizes with switch_mm(): callers are required to order
+-       * their read of mm_cpumask after their writes to the paging
+-       * structures.
+-       */
+-      return atomic64_inc_return(&mm->context.tlb_gen);
+-}
++/*
++ * The x86 feature is called PCID (Process Context IDentifier). It is similar
++ * to what is traditionally called ASID on the RISC processors.
++ *
++ * We don't use the traditional ASID implementation, where each process/mm gets
++ * its own ASID and flush/restart when we run out of ASID space.
++ *
++ * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
++ * that came by on this CPU, allowing cheaper switch_mm between processes on
++ * this CPU.
++ *
++ * We end up with different spaces for different things. To avoid confusion we
++ * use different names for each of them:
++ *
++ * ASID  - [0, TLB_NR_DYN_ASIDS-1]
++ *         the canonical identifier for an mm
++ *
++ * kPCID - [1, TLB_NR_DYN_ASIDS]
++ *         the value we write into the PCID part of CR3; corresponds to the
++ *         ASID+1, because PCID 0 is special.
++ *
++ * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
++ *         for KPTI each mm has two address spaces and thus needs two
++ *         PCID values, but we can still do with a single ASID denomination
++ *         for each mm. Corresponds to kPCID + 2048.
++ *
++ */
+ 
+ /* There are 12 bits of space for ASIDS in CR3 */
+ #define CR3_HW_ASID_BITS              12
+@@ -40,7 +57,7 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+ 
+ /*
+  * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
+- * for them being zero-based.  Another -1 is because ASID 0 is reserved for
++ * for them being zero-based.  Another -1 is because PCID 0 is reserved for
+  * use by non-PCID-aware users.
+  */
+ #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
+@@ -51,6 +68,9 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+  */
+ #define TLB_NR_DYN_ASIDS      6
+ 
++/*
++ * Given @asid, compute kPCID
++ */
+ static inline u16 kern_pcid(u16 asid)
+ {
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+@@ -85,7 +105,7 @@ static inline u16 kern_pcid(u16 asid)
+ }
+ 
+ /*
+- * The user PCID is just the kernel one, plus the "switch bit".
++ * Given @asid, compute uPCID
+  */
+ static inline u16 user_pcid(u16 asid)
+ {
+@@ -473,6 +493,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
+ void native_flush_tlb_others(const struct cpumask *cpumask,
+                            const struct flush_tlb_info *info);
+ 
++static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
++{
++      /*
++       * Bump the generation count.  This also serves as a full barrier
++       * that synchronizes with switch_mm(): callers are required to order
++       * their read of mm_cpumask after their writes to the paging
++       * structures.
++       */
++      return atomic64_inc_return(&mm->context.tlb_gen);
++}
++
+ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
+                                       struct mm_struct *mm)
+ {
+-- 
+2.14.2
+
diff --git a/patches/kernel/0216-x86-dumpstack-Indicate-in-Oops-whether-PTI-is-config.patch b/patches/kernel/0216-x86-dumpstack-Indicate-in-Oops-whether-PTI-is-config.patch

new file mode 100644 (file)

index 0000000..171622c
--- /dev/null
+++ b/patches/kernel/0216-x86-dumpstack-Indicate-in-Oops-whether-PTI-is-config.patch
@@ -0,0 +1,87 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Tue, 19 Dec 2017 22:33:46 +0100
+Subject: [PATCH] x86/dumpstack: Indicate in Oops whether PTI is configured and
+ enabled
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+CONFIG_PAGE_TABLE_ISOLATION is relatively new and intrusive feature that may
+still have some corner cases which could take some time to manifest and be
+fixed. It would be useful to have Oops messages indicate whether it was
+enabled for building the kernel, and whether it was disabled during boot.
+
+Example of fully enabled:
+
+       Oops: 0001 [#1] SMP PTI
+
+Example of enabled during build, but disabled during boot:
+
+       Oops: 0001 [#1] SMP NOPTI
+
+We can decide to remove this after the feature has been tested in the field
+long enough.
+
+[ tglx: Made it use boot_cpu_has() as requested by Borislav ]
+
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Eduardo Valentin <eduval@amazon.com>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirsky <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: bpetkov@suse.de
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: jkosina@suse.cz
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 5f26d76c3fd67c48806415ef8b1116c97beff8ba)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 7edb91fcc96589ad6b80446ec3835f83ffabb710)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/dumpstack.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index 2bdeb983b9d8..19a936e9b259 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -298,11 +298,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
+       unsigned long sp;
+ #endif
+       printk(KERN_DEFAULT
+-             "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
++             "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
+              IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT"         : "",
+              IS_ENABLED(CONFIG_SMP)     ? " SMP"             : "",
+              debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
+-             IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "");
++             IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "",
++             IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
++             (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
+ 
+       if (notify_die(DIE_OOPS, str, regs, err,
+                       current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0216-x86-mm-pti-Add-Kconfig.patch b/patches/kernel/0216-x86-mm-pti-Add-Kconfig.patch

deleted file mode 100644 (file)

index 07ebcee..0000000
--- a/patches/kernel/0216-x86-mm-pti-Add-Kconfig.patch
+++ /dev/null
@@ -1,81 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Mon, 4 Dec 2017 15:08:03 +0100
-Subject: [PATCH] x86/mm/pti: Add Kconfig
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Finally allow CONFIG_PAGE_TABLE_ISOLATION to be enabled.
-
-PARAVIRT generally requires that the kernel not manage its own page tables.
-It also means that the hypervisor and kernel must agree wholeheartedly
-about what format the page tables are in and what they contain.
-PAGE_TABLE_ISOLATION, unfortunately, changes the rules and they
-can not be used together.
-
-I've seen conflicting feedback from maintainers lately about whether they
-want the Kconfig magic to go first or last in a patch series.  It's going
-last here because the partially-applied series leads to kernels that can
-not boot in a bunch of cases.  I did a run through the entire series with
-CONFIG_PAGE_TABLE_ISOLATION=y to look for build errors, though.
-
-[ tglx: Removed SMP and !PARAVIRT dependencies as they not longer exist ]
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 385ce0ea4c078517fa51c261882c4e72fba53005)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit ce12963b837e809f6ae048587d9377a298c1094d)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- security/Kconfig | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/security/Kconfig b/security/Kconfig
-index 305b496ff6a3..91cb8f611a0d 100644
---- a/security/Kconfig
-+++ b/security/Kconfig
-@@ -96,6 +96,16 @@ config SECURITY_NETWORK
-         implement socket and networking access controls.
-         If you are unsure how to answer this question, answer N.
- 
-+config PAGE_TABLE_ISOLATION
-+      bool "Remove the kernel mapping in user mode"
-+      depends on X86_64 && !UML
-+      help
-+        This feature reduces the number of hardware side channels by
-+        ensuring that the majority of kernel addresses are not mapped
-+        into userspace.
-+
-+        See Documentation/x86/pagetable-isolation.txt for more details.
-+
- config SECURITY_INFINIBAND
-       bool "Infiniband Security Hooks"
-       depends on SECURITY && INFINIBAND
--- 
-2.14.2
-
diff --git a/patches/kernel/0217-x86-mm-dump_pagetables-Add-page-table-directory-to-t.patch b/patches/kernel/0217-x86-mm-dump_pagetables-Add-page-table-directory-to-t.patch

deleted file mode 100644 (file)

index a80eab0..0000000
--- a/patches/kernel/0217-x86-mm-dump_pagetables-Add-page-table-directory-to-t.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Borislav Petkov <bp@suse.de>
-Date: Mon, 4 Dec 2017 15:08:04 +0100
-Subject: [PATCH] x86/mm/dump_pagetables: Add page table directory to the
- debugfs VFS hierarchy
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The upcoming support for dumping the kernel and the user space page tables
-of the current process would create more random files in the top level
-debugfs directory.
-
-Add a page table directory and move the existing file to it.
-
-Signed-off-by: Borislav Petkov <bp@suse.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 75298aa179d56cd64f54e58a19fffc8ab922b4c0)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit ae5c4af9bbefed4adc12075c28fb5889547c99cc)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/debug_pagetables.c | 15 ++++++++++-----
- 1 file changed, 10 insertions(+), 5 deletions(-)
-
-diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
-index bfcffdf6c577..d1449fb6dc7a 100644
---- a/arch/x86/mm/debug_pagetables.c
-+++ b/arch/x86/mm/debug_pagetables.c
-@@ -22,21 +22,26 @@ static const struct file_operations ptdump_fops = {
-       .release        = single_release,
- };
- 
--static struct dentry *pe;
-+static struct dentry *dir, *pe;
- 
- static int __init pt_dump_debug_init(void)
- {
--      pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
--                               &ptdump_fops);
--      if (!pe)
-+      dir = debugfs_create_dir("page_tables", NULL);
-+      if (!dir)
-               return -ENOMEM;
- 
-+      pe = debugfs_create_file("kernel", 0400, dir, NULL, &ptdump_fops);
-+      if (!pe)
-+              goto err;
-       return 0;
-+err:
-+      debugfs_remove_recursive(dir);
-+      return -ENOMEM;
- }
- 
- static void __exit pt_dump_debug_exit(void)
- {
--      debugfs_remove_recursive(pe);
-+      debugfs_remove_recursive(dir);
- }
- 
- module_init(pt_dump_debug_init);
--- 
-2.14.2
-
diff --git a/patches/kernel/0217-x86-mm-pti-Add-Kconfig.patch b/patches/kernel/0217-x86-mm-pti-Add-Kconfig.patch

new file mode 100644 (file)

index 0000000..07ebcee
--- /dev/null
+++ b/patches/kernel/0217-x86-mm-pti-Add-Kconfig.patch
@@ -0,0 +1,81 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Mon, 4 Dec 2017 15:08:03 +0100
+Subject: [PATCH] x86/mm/pti: Add Kconfig
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Finally allow CONFIG_PAGE_TABLE_ISOLATION to be enabled.
+
+PARAVIRT generally requires that the kernel not manage its own page tables.
+It also means that the hypervisor and kernel must agree wholeheartedly
+about what format the page tables are in and what they contain.
+PAGE_TABLE_ISOLATION, unfortunately, changes the rules and they
+can not be used together.
+
+I've seen conflicting feedback from maintainers lately about whether they
+want the Kconfig magic to go first or last in a patch series.  It's going
+last here because the partially-applied series leads to kernels that can
+not boot in a bunch of cases.  I did a run through the entire series with
+CONFIG_PAGE_TABLE_ISOLATION=y to look for build errors, though.
+
+[ tglx: Removed SMP and !PARAVIRT dependencies as they not longer exist ]
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 385ce0ea4c078517fa51c261882c4e72fba53005)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit ce12963b837e809f6ae048587d9377a298c1094d)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ security/Kconfig | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/security/Kconfig b/security/Kconfig
+index 305b496ff6a3..91cb8f611a0d 100644
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -96,6 +96,16 @@ config SECURITY_NETWORK
+         implement socket and networking access controls.
+         If you are unsure how to answer this question, answer N.
+ 
++config PAGE_TABLE_ISOLATION
++      bool "Remove the kernel mapping in user mode"
++      depends on X86_64 && !UML
++      help
++        This feature reduces the number of hardware side channels by
++        ensuring that the majority of kernel addresses are not mapped
++        into userspace.
++
++        See Documentation/x86/pagetable-isolation.txt for more details.
++
+ config SECURITY_INFINIBAND
+       bool "Infiniband Security Hooks"
+       depends on SECURITY && INFINIBAND
+-- 
+2.14.2
+
diff --git a/patches/kernel/0218-x86-mm-dump_pagetables-Add-page-table-directory-to-t.patch b/patches/kernel/0218-x86-mm-dump_pagetables-Add-page-table-directory-to-t.patch

new file mode 100644 (file)

index 0000000..a80eab0
--- /dev/null
+++ b/patches/kernel/0218-x86-mm-dump_pagetables-Add-page-table-directory-to-t.patch
@@ -0,0 +1,87 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Mon, 4 Dec 2017 15:08:04 +0100
+Subject: [PATCH] x86/mm/dump_pagetables: Add page table directory to the
+ debugfs VFS hierarchy
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The upcoming support for dumping the kernel and the user space page tables
+of the current process would create more random files in the top level
+debugfs directory.
+
+Add a page table directory and move the existing file to it.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 75298aa179d56cd64f54e58a19fffc8ab922b4c0)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit ae5c4af9bbefed4adc12075c28fb5889547c99cc)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/debug_pagetables.c | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
+index bfcffdf6c577..d1449fb6dc7a 100644
+--- a/arch/x86/mm/debug_pagetables.c
++++ b/arch/x86/mm/debug_pagetables.c
+@@ -22,21 +22,26 @@ static const struct file_operations ptdump_fops = {
+       .release        = single_release,
+ };
+ 
+-static struct dentry *pe;
++static struct dentry *dir, *pe;
+ 
+ static int __init pt_dump_debug_init(void)
+ {
+-      pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
+-                               &ptdump_fops);
+-      if (!pe)
++      dir = debugfs_create_dir("page_tables", NULL);
++      if (!dir)
+               return -ENOMEM;
+ 
++      pe = debugfs_create_file("kernel", 0400, dir, NULL, &ptdump_fops);
++      if (!pe)
++              goto err;
+       return 0;
++err:
++      debugfs_remove_recursive(dir);
++      return -ENOMEM;
+ }
+ 
+ static void __exit pt_dump_debug_exit(void)
+ {
+-      debugfs_remove_recursive(pe);
++      debugfs_remove_recursive(dir);
+ }
+ 
+ module_init(pt_dump_debug_init);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0218-x86-mm-dump_pagetables-Check-user-space-page-table-f.patch b/patches/kernel/0218-x86-mm-dump_pagetables-Check-user-space-page-table-f.patch

deleted file mode 100644 (file)

index 8152b66..0000000
--- a/patches/kernel/0218-x86-mm-dump_pagetables-Check-user-space-page-table-f.patch
+++ /dev/null
@@ -1,139 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 4 Dec 2017 15:08:05 +0100
-Subject: [PATCH] x86/mm/dump_pagetables: Check user space page table for WX
- pages
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-ptdump_walk_pgd_level_checkwx() checks the kernel page table for WX pages,
-but does not check the PAGE_TABLE_ISOLATION user space page table.
-
-Restructure the code so that dmesg output is selected by an explicit
-argument and not implicit via checking the pgd argument for !NULL.
-
-Add the check for the user space page table.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit b4bf4f924b1d7bade38fd51b2e401d20d0956e4d)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 1adfe82e8fe5afa2fae59efe498c461d5a52cb6c)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/pgtable.h |  1 +
- arch/x86/mm/debug_pagetables.c |  2 +-
- arch/x86/mm/dump_pagetables.c  | 30 +++++++++++++++++++++++++-----
- 3 files changed, 27 insertions(+), 6 deletions(-)
-
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index 25604b8a251a..4f5eb81cf8be 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -17,6 +17,7 @@
- #include <asm/x86_init.h>
- 
- void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
-+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd);
- void ptdump_walk_pgd_level_checkwx(void);
- 
- #ifdef CONFIG_DEBUG_WX
-diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
-index d1449fb6dc7a..8e70c1599e51 100644
---- a/arch/x86/mm/debug_pagetables.c
-+++ b/arch/x86/mm/debug_pagetables.c
-@@ -5,7 +5,7 @@
- 
- static int ptdump_show(struct seq_file *m, void *v)
- {
--      ptdump_walk_pgd_level(m, NULL);
-+      ptdump_walk_pgd_level_debugfs(m, NULL);
-       return 0;
- }
- 
-diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
-index eed93dd4cb4a..7b022ad37c4e 100644
---- a/arch/x86/mm/dump_pagetables.c
-+++ b/arch/x86/mm/dump_pagetables.c
-@@ -457,7 +457,7 @@ static inline bool is_hypervisor_range(int idx)
- }
- 
- static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
--                                     bool checkwx)
-+                                     bool checkwx, bool dmesg)
- {
- #ifdef CONFIG_X86_64
-       pgd_t *start = (pgd_t *) &init_top_pgt;
-@@ -470,7 +470,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
- 
-       if (pgd) {
-               start = pgd;
--              st.to_dmesg = true;
-+              st.to_dmesg = dmesg;
-       }
- 
-       st.check_wx = checkwx;
-@@ -508,13 +508,33 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
- 
- void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
- {
--      ptdump_walk_pgd_level_core(m, pgd, false);
-+      ptdump_walk_pgd_level_core(m, pgd, false, true);
-+}
-+
-+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd)
-+{
-+      ptdump_walk_pgd_level_core(m, pgd, false, false);
-+}
-+EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
-+
-+static void ptdump_walk_user_pgd_level_checkwx(void)
-+{
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+      pgd_t *pgd = (pgd_t *) &init_top_pgt;
-+
-+      if (!static_cpu_has(X86_FEATURE_PTI))
-+              return;
-+
-+      pr_info("x86/mm: Checking user space page tables\n");
-+      pgd = kernel_to_user_pgdp(pgd);
-+      ptdump_walk_pgd_level_core(NULL, pgd, true, false);
-+#endif
- }
--EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
- 
- void ptdump_walk_pgd_level_checkwx(void)
- {
--      ptdump_walk_pgd_level_core(NULL, NULL, true);
-+      ptdump_walk_pgd_level_core(NULL, NULL, true, false);
-+      ptdump_walk_user_pgd_level_checkwx();
- }
- 
- static int __init pt_dump_init(void)
--- 
-2.14.2
-
diff --git a/patches/kernel/0219-x86-mm-dump_pagetables-Allow-dumping-current-pagetab.patch b/patches/kernel/0219-x86-mm-dump_pagetables-Allow-dumping-current-pagetab.patch

deleted file mode 100644 (file)

index 00aa1ce..0000000
--- a/patches/kernel/0219-x86-mm-dump_pagetables-Allow-dumping-current-pagetab.patch
+++ /dev/null
@@ -1,188 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon, 4 Dec 2017 15:08:06 +0100
-Subject: [PATCH] x86/mm/dump_pagetables: Allow dumping current pagetables
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Add two debugfs files which allow to dump the pagetable of the current
-task.
-
-current_kernel dumps the regular page table. This is the page table which
-is normally shared between kernel and user space. If kernel page table
-isolation is enabled this is the kernel space mapping.
-
-If kernel page table isolation is enabled the second file, current_user,
-dumps the user space page table.
-
-These files allow to verify the resulting page tables for page table
-isolation, but even in the normal case its useful to be able to inspect
-user space page tables of current for debugging purposes.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: David Laight <David.Laight@aculab.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: Eduardo Valentin <eduval@amazon.com>
-Cc: Greg KH <gregkh@linuxfoundation.org>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Will Deacon <will.deacon@arm.com>
-Cc: aliguori@amazon.com
-Cc: daniel.gruss@iaik.tugraz.at
-Cc: hughd@google.com
-Cc: keescook@google.com
-Cc: linux-mm@kvack.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit a4b51ef6552c704764684cef7e753162dc87c5fa)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit e31e0526cb47bd1d848fc3fdb10d2aeb909e46b5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/pgtable.h |  2 +-
- arch/x86/mm/debug_pagetables.c | 71 +++++++++++++++++++++++++++++++++++++++---
- arch/x86/mm/dump_pagetables.c  |  6 +++-
- 3 files changed, 73 insertions(+), 6 deletions(-)
-
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index 4f5eb81cf8be..1f9e7fea3c06 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -17,7 +17,7 @@
- #include <asm/x86_init.h>
- 
- void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
--void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd);
-+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
- void ptdump_walk_pgd_level_checkwx(void);
- 
- #ifdef CONFIG_DEBUG_WX
-diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
-index 8e70c1599e51..421f2664ffa0 100644
---- a/arch/x86/mm/debug_pagetables.c
-+++ b/arch/x86/mm/debug_pagetables.c
-@@ -5,7 +5,7 @@
- 
- static int ptdump_show(struct seq_file *m, void *v)
- {
--      ptdump_walk_pgd_level_debugfs(m, NULL);
-+      ptdump_walk_pgd_level_debugfs(m, NULL, false);
-       return 0;
- }
- 
-@@ -22,7 +22,57 @@ static const struct file_operations ptdump_fops = {
-       .release        = single_release,
- };
- 
--static struct dentry *dir, *pe;
-+static int ptdump_show_curknl(struct seq_file *m, void *v)
-+{
-+      if (current->mm->pgd) {
-+              down_read(&current->mm->mmap_sem);
-+              ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
-+              up_read(&current->mm->mmap_sem);
-+      }
-+      return 0;
-+}
-+
-+static int ptdump_open_curknl(struct inode *inode, struct file *filp)
-+{
-+      return single_open(filp, ptdump_show_curknl, NULL);
-+}
-+
-+static const struct file_operations ptdump_curknl_fops = {
-+      .owner          = THIS_MODULE,
-+      .open           = ptdump_open_curknl,
-+      .read           = seq_read,
-+      .llseek         = seq_lseek,
-+      .release        = single_release,
-+};
-+
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+static struct dentry *pe_curusr;
-+
-+static int ptdump_show_curusr(struct seq_file *m, void *v)
-+{
-+      if (current->mm->pgd) {
-+              down_read(&current->mm->mmap_sem);
-+              ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
-+              up_read(&current->mm->mmap_sem);
-+      }
-+      return 0;
-+}
-+
-+static int ptdump_open_curusr(struct inode *inode, struct file *filp)
-+{
-+      return single_open(filp, ptdump_show_curusr, NULL);
-+}
-+
-+static const struct file_operations ptdump_curusr_fops = {
-+      .owner          = THIS_MODULE,
-+      .open           = ptdump_open_curusr,
-+      .read           = seq_read,
-+      .llseek         = seq_lseek,
-+      .release        = single_release,
-+};
-+#endif
-+
-+static struct dentry *dir, *pe_knl, *pe_curknl;
- 
- static int __init pt_dump_debug_init(void)
- {
-@@ -30,9 +80,22 @@ static int __init pt_dump_debug_init(void)
-       if (!dir)
-               return -ENOMEM;
- 
--      pe = debugfs_create_file("kernel", 0400, dir, NULL, &ptdump_fops);
--      if (!pe)
-+      pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
-+                                   &ptdump_fops);
-+      if (!pe_knl)
-+              goto err;
-+
-+      pe_curknl = debugfs_create_file("current_kernel", 0400,
-+                                      dir, NULL, &ptdump_curknl_fops);
-+      if (!pe_curknl)
-+              goto err;
-+
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+      pe_curusr = debugfs_create_file("current_user", 0400,
-+                                      dir, NULL, &ptdump_curusr_fops);
-+      if (!pe_curusr)
-               goto err;
-+#endif
-       return 0;
- err:
-       debugfs_remove_recursive(dir);
-diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
-index 7b022ad37c4e..12b93d350480 100644
---- a/arch/x86/mm/dump_pagetables.c
-+++ b/arch/x86/mm/dump_pagetables.c
-@@ -511,8 +511,12 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
-       ptdump_walk_pgd_level_core(m, pgd, false, true);
- }
- 
--void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd)
-+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
- {
-+#ifdef CONFIG_PAGE_TABLE_ISOLATION
-+      if (user && static_cpu_has(X86_FEATURE_PTI))
-+              pgd = kernel_to_user_pgdp(pgd);
-+#endif
-       ptdump_walk_pgd_level_core(m, pgd, false, false);
- }
- EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
--- 
-2.14.2
-
diff --git a/patches/kernel/0219-x86-mm-dump_pagetables-Check-user-space-page-table-f.patch b/patches/kernel/0219-x86-mm-dump_pagetables-Check-user-space-page-table-f.patch

new file mode 100644 (file)

index 0000000..8152b66
--- /dev/null
+++ b/patches/kernel/0219-x86-mm-dump_pagetables-Check-user-space-page-table-f.patch
@@ -0,0 +1,139 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:08:05 +0100
+Subject: [PATCH] x86/mm/dump_pagetables: Check user space page table for WX
+ pages
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+ptdump_walk_pgd_level_checkwx() checks the kernel page table for WX pages,
+but does not check the PAGE_TABLE_ISOLATION user space page table.
+
+Restructure the code so that dmesg output is selected by an explicit
+argument and not implicit via checking the pgd argument for !NULL.
+
+Add the check for the user space page table.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit b4bf4f924b1d7bade38fd51b2e401d20d0956e4d)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 1adfe82e8fe5afa2fae59efe498c461d5a52cb6c)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/pgtable.h |  1 +
+ arch/x86/mm/debug_pagetables.c |  2 +-
+ arch/x86/mm/dump_pagetables.c  | 30 +++++++++++++++++++++++++-----
+ 3 files changed, 27 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index 25604b8a251a..4f5eb81cf8be 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -17,6 +17,7 @@
+ #include <asm/x86_init.h>
+ 
+ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
++void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd);
+ void ptdump_walk_pgd_level_checkwx(void);
+ 
+ #ifdef CONFIG_DEBUG_WX
+diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
+index d1449fb6dc7a..8e70c1599e51 100644
+--- a/arch/x86/mm/debug_pagetables.c
++++ b/arch/x86/mm/debug_pagetables.c
+@@ -5,7 +5,7 @@
+ 
+ static int ptdump_show(struct seq_file *m, void *v)
+ {
+-      ptdump_walk_pgd_level(m, NULL);
++      ptdump_walk_pgd_level_debugfs(m, NULL);
+       return 0;
+ }
+ 
+diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
+index eed93dd4cb4a..7b022ad37c4e 100644
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -457,7 +457,7 @@ static inline bool is_hypervisor_range(int idx)
+ }
+ 
+ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
+-                                     bool checkwx)
++                                     bool checkwx, bool dmesg)
+ {
+ #ifdef CONFIG_X86_64
+       pgd_t *start = (pgd_t *) &init_top_pgt;
+@@ -470,7 +470,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
+ 
+       if (pgd) {
+               start = pgd;
+-              st.to_dmesg = true;
++              st.to_dmesg = dmesg;
+       }
+ 
+       st.check_wx = checkwx;
+@@ -508,13 +508,33 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
+ 
+ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
+ {
+-      ptdump_walk_pgd_level_core(m, pgd, false);
++      ptdump_walk_pgd_level_core(m, pgd, false, true);
++}
++
++void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd)
++{
++      ptdump_walk_pgd_level_core(m, pgd, false, false);
++}
++EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
++
++static void ptdump_walk_user_pgd_level_checkwx(void)
++{
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++      pgd_t *pgd = (pgd_t *) &init_top_pgt;
++
++      if (!static_cpu_has(X86_FEATURE_PTI))
++              return;
++
++      pr_info("x86/mm: Checking user space page tables\n");
++      pgd = kernel_to_user_pgdp(pgd);
++      ptdump_walk_pgd_level_core(NULL, pgd, true, false);
++#endif
+ }
+-EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
+ 
+ void ptdump_walk_pgd_level_checkwx(void)
+ {
+-      ptdump_walk_pgd_level_core(NULL, NULL, true);
++      ptdump_walk_pgd_level_core(NULL, NULL, true, false);
++      ptdump_walk_user_pgd_level_checkwx();
+ }
+ 
+ static int __init pt_dump_init(void)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0220-x86-ldt-Make-the-LDT-mapping-RO.patch b/patches/kernel/0220-x86-ldt-Make-the-LDT-mapping-RO.patch

deleted file mode 100644 (file)

index 4e42731..0000000
--- a/patches/kernel/0220-x86-ldt-Make-the-LDT-mapping-RO.patch
+++ /dev/null
@@ -1,123 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Fri, 15 Dec 2017 20:35:11 +0100
-Subject: [PATCH] x86/ldt: Make the LDT mapping RO
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Now that the LDT mapping is in a known area when PAGE_TABLE_ISOLATION is
-enabled its a primary target for attacks, if a user space interface fails
-to validate a write address correctly. That can never happen, right?
-
-The SDM states:
-
-    If the segment descriptors in the GDT or an LDT are placed in ROM, the
-    processor can enter an indefinite loop if software or the processor
-    attempts to update (write to) the ROM-based segment descriptors. To
-    prevent this problem, set the accessed bits for all segment descriptors
-    placed in a ROM. Also, remove operating-system or executive code that
-    attempts to modify segment descriptors located in ROM.
-
-So its a valid approach to set the ACCESS bit when setting up the LDT entry
-and to map the table RO. Fixup the selftest so it can handle that new mode.
-
-Remove the manual ACCESS bit setter in set_tls_desc() as this is now
-pointless. Folded the patch from Peter Ziljstra.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Juergen Gross <jgross@suse.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 9f5cb6b32d9e0a3a7453222baaf15664d92adbf2)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f4b13d6f67b3a89d878094901a9ca834b39415c1)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/desc.h           |  2 ++
- arch/x86/kernel/ldt.c                 |  7 ++++++-
- arch/x86/kernel/tls.c                 | 11 ++---------
- tools/testing/selftests/x86/ldt_gdt.c |  3 +--
- 4 files changed, 11 insertions(+), 12 deletions(-)
-
-diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
-index de40c514ba25..c765bc294a9d 100644
---- a/arch/x86/include/asm/desc.h
-+++ b/arch/x86/include/asm/desc.h
-@@ -20,6 +20,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
- 
-       desc->type              = (info->read_exec_only ^ 1) << 1;
-       desc->type             |= info->contents << 2;
-+      /* Set the ACCESS bit so it can be mapped RO */
-+      desc->type             |= 1;
- 
-       desc->s                 = 1;
-       desc->dpl               = 0x3;
-diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
-index eceaada581ff..2260eb6e2de7 100644
---- a/arch/x86/kernel/ldt.c
-+++ b/arch/x86/kernel/ldt.c
-@@ -157,7 +157,12 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
-               ptep = get_locked_pte(mm, va, &ptl);
-               if (!ptep)
-                       return -ENOMEM;
--              pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL));
-+              /*
-+               * Map it RO so the easy to find address is not a primary
-+               * target via some kernel interface which misses a
-+               * permission check.
-+               */
-+              pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
-               set_pte_at(mm, va, ptep, pte);
-               pte_unmap_unlock(ptep, ptl);
-       }
-diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
-index a106b9719c58..41880a2421ea 100644
---- a/arch/x86/kernel/tls.c
-+++ b/arch/x86/kernel/tls.c
-@@ -92,17 +92,10 @@ static void set_tls_desc(struct task_struct *p, int idx,
-       cpu = get_cpu();
- 
-       while (n-- > 0) {
--              if (LDT_empty(info) || LDT_zero(info)) {
-+              if (LDT_empty(info) || LDT_zero(info))
-                       memset(desc, 0, sizeof(*desc));
--              } else {
-+              else
-                       fill_ldt(desc, info);
--
--                      /*
--                       * Always set the accessed bit so that the CPU
--                       * doesn't try to write to the (read-only) GDT.
--                       */
--                      desc->type |= 1;
--              }
-               ++info;
-               ++desc;
-       }
-diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
-index 783e1a754b78..bbd1d0e4d683 100644
---- a/tools/testing/selftests/x86/ldt_gdt.c
-+++ b/tools/testing/selftests/x86/ldt_gdt.c
-@@ -121,8 +121,7 @@ static void check_valid_segment(uint16_t index, int ldt,
-        * NB: Different Linux versions do different things with the
-        * accessed bit in set_thread_area().
-        */
--      if (ar != expected_ar &&
--          (ldt || ar != (expected_ar | AR_ACCESSED))) {
-+      if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) {
-               printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
-                      (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
-               nerrs++;
--- 
-2.14.2
-
diff --git a/patches/kernel/0220-x86-mm-dump_pagetables-Allow-dumping-current-pagetab.patch b/patches/kernel/0220-x86-mm-dump_pagetables-Allow-dumping-current-pagetab.patch

new file mode 100644 (file)

index 0000000..00aa1ce
--- /dev/null
+++ b/patches/kernel/0220-x86-mm-dump_pagetables-Allow-dumping-current-pagetab.patch
@@ -0,0 +1,188 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:08:06 +0100
+Subject: [PATCH] x86/mm/dump_pagetables: Allow dumping current pagetables
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Add two debugfs files which allow to dump the pagetable of the current
+task.
+
+current_kernel dumps the regular page table. This is the page table which
+is normally shared between kernel and user space. If kernel page table
+isolation is enabled this is the kernel space mapping.
+
+If kernel page table isolation is enabled the second file, current_user,
+dumps the user space page table.
+
+These files allow to verify the resulting page tables for page table
+isolation, but even in the normal case its useful to be able to inspect
+user space page tables of current for debugging purposes.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit a4b51ef6552c704764684cef7e753162dc87c5fa)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit e31e0526cb47bd1d848fc3fdb10d2aeb909e46b5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/pgtable.h |  2 +-
+ arch/x86/mm/debug_pagetables.c | 71 +++++++++++++++++++++++++++++++++++++++---
+ arch/x86/mm/dump_pagetables.c  |  6 +++-
+ 3 files changed, 73 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index 4f5eb81cf8be..1f9e7fea3c06 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -17,7 +17,7 @@
+ #include <asm/x86_init.h>
+ 
+ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+-void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd);
++void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
+ void ptdump_walk_pgd_level_checkwx(void);
+ 
+ #ifdef CONFIG_DEBUG_WX
+diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
+index 8e70c1599e51..421f2664ffa0 100644
+--- a/arch/x86/mm/debug_pagetables.c
++++ b/arch/x86/mm/debug_pagetables.c
+@@ -5,7 +5,7 @@
+ 
+ static int ptdump_show(struct seq_file *m, void *v)
+ {
+-      ptdump_walk_pgd_level_debugfs(m, NULL);
++      ptdump_walk_pgd_level_debugfs(m, NULL, false);
+       return 0;
+ }
+ 
+@@ -22,7 +22,57 @@ static const struct file_operations ptdump_fops = {
+       .release        = single_release,
+ };
+ 
+-static struct dentry *dir, *pe;
++static int ptdump_show_curknl(struct seq_file *m, void *v)
++{
++      if (current->mm->pgd) {
++              down_read(&current->mm->mmap_sem);
++              ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
++              up_read(&current->mm->mmap_sem);
++      }
++      return 0;
++}
++
++static int ptdump_open_curknl(struct inode *inode, struct file *filp)
++{
++      return single_open(filp, ptdump_show_curknl, NULL);
++}
++
++static const struct file_operations ptdump_curknl_fops = {
++      .owner          = THIS_MODULE,
++      .open           = ptdump_open_curknl,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = single_release,
++};
++
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++static struct dentry *pe_curusr;
++
++static int ptdump_show_curusr(struct seq_file *m, void *v)
++{
++      if (current->mm->pgd) {
++              down_read(&current->mm->mmap_sem);
++              ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
++              up_read(&current->mm->mmap_sem);
++      }
++      return 0;
++}
++
++static int ptdump_open_curusr(struct inode *inode, struct file *filp)
++{
++      return single_open(filp, ptdump_show_curusr, NULL);
++}
++
++static const struct file_operations ptdump_curusr_fops = {
++      .owner          = THIS_MODULE,
++      .open           = ptdump_open_curusr,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = single_release,
++};
++#endif
++
++static struct dentry *dir, *pe_knl, *pe_curknl;
+ 
+ static int __init pt_dump_debug_init(void)
+ {
+@@ -30,9 +80,22 @@ static int __init pt_dump_debug_init(void)
+       if (!dir)
+               return -ENOMEM;
+ 
+-      pe = debugfs_create_file("kernel", 0400, dir, NULL, &ptdump_fops);
+-      if (!pe)
++      pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
++                                   &ptdump_fops);
++      if (!pe_knl)
++              goto err;
++
++      pe_curknl = debugfs_create_file("current_kernel", 0400,
++                                      dir, NULL, &ptdump_curknl_fops);
++      if (!pe_curknl)
++              goto err;
++
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++      pe_curusr = debugfs_create_file("current_user", 0400,
++                                      dir, NULL, &ptdump_curusr_fops);
++      if (!pe_curusr)
+               goto err;
++#endif
+       return 0;
+ err:
+       debugfs_remove_recursive(dir);
+diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
+index 7b022ad37c4e..12b93d350480 100644
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -511,8 +511,12 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
+       ptdump_walk_pgd_level_core(m, pgd, false, true);
+ }
+ 
+-void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd)
++void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
+ {
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++      if (user && static_cpu_has(X86_FEATURE_PTI))
++              pgd = kernel_to_user_pgdp(pgd);
++#endif
+       ptdump_walk_pgd_level_core(m, pgd, false, false);
+ }
+ EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0221-x86-ldt-Make-the-LDT-mapping-RO.patch b/patches/kernel/0221-x86-ldt-Make-the-LDT-mapping-RO.patch

new file mode 100644 (file)

index 0000000..4e42731
--- /dev/null
+++ b/patches/kernel/0221-x86-ldt-Make-the-LDT-mapping-RO.patch
@@ -0,0 +1,123 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 15 Dec 2017 20:35:11 +0100
+Subject: [PATCH] x86/ldt: Make the LDT mapping RO
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Now that the LDT mapping is in a known area when PAGE_TABLE_ISOLATION is
+enabled its a primary target for attacks, if a user space interface fails
+to validate a write address correctly. That can never happen, right?
+
+The SDM states:
+
+    If the segment descriptors in the GDT or an LDT are placed in ROM, the
+    processor can enter an indefinite loop if software or the processor
+    attempts to update (write to) the ROM-based segment descriptors. To
+    prevent this problem, set the accessed bits for all segment descriptors
+    placed in a ROM. Also, remove operating-system or executive code that
+    attempts to modify segment descriptors located in ROM.
+
+So its a valid approach to set the ACCESS bit when setting up the LDT entry
+and to map the table RO. Fixup the selftest so it can handle that new mode.
+
+Remove the manual ACCESS bit setter in set_tls_desc() as this is now
+pointless. Folded the patch from Peter Ziljstra.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 9f5cb6b32d9e0a3a7453222baaf15664d92adbf2)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f4b13d6f67b3a89d878094901a9ca834b39415c1)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/desc.h           |  2 ++
+ arch/x86/kernel/ldt.c                 |  7 ++++++-
+ arch/x86/kernel/tls.c                 | 11 ++---------
+ tools/testing/selftests/x86/ldt_gdt.c |  3 +--
+ 4 files changed, 11 insertions(+), 12 deletions(-)
+
+diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
+index de40c514ba25..c765bc294a9d 100644
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -20,6 +20,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
+ 
+       desc->type              = (info->read_exec_only ^ 1) << 1;
+       desc->type             |= info->contents << 2;
++      /* Set the ACCESS bit so it can be mapped RO */
++      desc->type             |= 1;
+ 
+       desc->s                 = 1;
+       desc->dpl               = 0x3;
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
+index eceaada581ff..2260eb6e2de7 100644
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -157,7 +157,12 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
+               ptep = get_locked_pte(mm, va, &ptl);
+               if (!ptep)
+                       return -ENOMEM;
+-              pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL));
++              /*
++               * Map it RO so the easy to find address is not a primary
++               * target via some kernel interface which misses a
++               * permission check.
++               */
++              pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
+               set_pte_at(mm, va, ptep, pte);
+               pte_unmap_unlock(ptep, ptl);
+       }
+diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
+index a106b9719c58..41880a2421ea 100644
+--- a/arch/x86/kernel/tls.c
++++ b/arch/x86/kernel/tls.c
+@@ -92,17 +92,10 @@ static void set_tls_desc(struct task_struct *p, int idx,
+       cpu = get_cpu();
+ 
+       while (n-- > 0) {
+-              if (LDT_empty(info) || LDT_zero(info)) {
++              if (LDT_empty(info) || LDT_zero(info))
+                       memset(desc, 0, sizeof(*desc));
+-              } else {
++              else
+                       fill_ldt(desc, info);
+-
+-                      /*
+-                       * Always set the accessed bit so that the CPU
+-                       * doesn't try to write to the (read-only) GDT.
+-                       */
+-                      desc->type |= 1;
+-              }
+               ++info;
+               ++desc;
+       }
+diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
+index 783e1a754b78..bbd1d0e4d683 100644
+--- a/tools/testing/selftests/x86/ldt_gdt.c
++++ b/tools/testing/selftests/x86/ldt_gdt.c
+@@ -121,8 +121,7 @@ static void check_valid_segment(uint16_t index, int ldt,
+        * NB: Different Linux versions do different things with the
+        * accessed bit in set_thread_area().
+        */
+-      if (ar != expected_ar &&
+-          (ldt || ar != (expected_ar | AR_ACCESSED))) {
++      if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) {
+               printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
+                      (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
+               nerrs++;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0221-x86-smpboot-Remove-stale-TLB-flush-invocations.patch b/patches/kernel/0221-x86-smpboot-Remove-stale-TLB-flush-invocations.patch

deleted file mode 100644 (file)

index b5c62d0..0000000
--- a/patches/kernel/0221-x86-smpboot-Remove-stale-TLB-flush-invocations.patch
+++ /dev/null
@@ -1,74 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Sat, 30 Dec 2017 22:13:53 +0100
-Subject: [PATCH] x86/smpboot: Remove stale TLB flush invocations
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-smpboot_setup_warm_reset_vector() and smpboot_restore_warm_reset_vector()
-invoke local_flush_tlb() for no obvious reason.
-
-Digging in history revealed that the original code in the 2.1 era added
-those because the code manipulated a swapper_pg_dir pagetable entry. The
-pagetable manipulation was removed long ago in the 2.3 timeframe, but the
-TLB flush invocations stayed around forever.
-
-Remove them along with the pointless pr_debug()s which come from the same 2.1
-change.
-
-Reported-by: Dominik Brodowski <linux@dominikbrodowski.net>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: <stable@vger.kernel.org>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Linus Torvalds <torvalds@linuxfoundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Link: http://lkml.kernel.org/r/20171230211829.586548655@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 322f8b8b340c824aef891342b0f5795d15e11562)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit fb08c4a80a22dc79c9775f493e291dfe2c642b86)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/smpboot.c | 9 ---------
- 1 file changed, 9 deletions(-)
-
-diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
-index 03d2ba2da3b0..6ad8391b9866 100644
---- a/arch/x86/kernel/smpboot.c
-+++ b/arch/x86/kernel/smpboot.c
-@@ -128,25 +128,16 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
-       spin_lock_irqsave(&rtc_lock, flags);
-       CMOS_WRITE(0xa, 0xf);
-       spin_unlock_irqrestore(&rtc_lock, flags);
--      local_flush_tlb();
--      pr_debug("1.\n");
-       *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
-                                                       start_eip >> 4;
--      pr_debug("2.\n");
-       *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
-                                                       start_eip & 0xf;
--      pr_debug("3.\n");
- }
- 
- static inline void smpboot_restore_warm_reset_vector(void)
- {
-       unsigned long flags;
- 
--      /*
--       * Install writable page 0 entry to set BIOS data area.
--       */
--      local_flush_tlb();
--
-       /*
-        * Paranoid:  Set warm reset code and vector here back
-        * to default values.
--- 
-2.14.2
-
diff --git a/patches/kernel/0222-x86-mm-Remove-preempt_disable-enable-from-__native_f.patch b/patches/kernel/0222-x86-mm-Remove-preempt_disable-enable-from-__native_f.patch

deleted file mode 100644 (file)

index 934ff9b..0000000
--- a/patches/kernel/0222-x86-mm-Remove-preempt_disable-enable-from-__native_f.patch
+++ /dev/null
@@ -1,89 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Sat, 30 Dec 2017 22:13:54 +0100
-Subject: [PATCH] x86/mm: Remove preempt_disable/enable() from
- __native_flush_tlb()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The preempt_disable/enable() pair in __native_flush_tlb() was added in
-commit:
-
-  5cf0791da5c1 ("x86/mm: Disable preemption during CR3 read+write")
-
-... to protect the UP variant of flush_tlb_mm_range().
-
-That preempt_disable/enable() pair should have been added to the UP variant
-of flush_tlb_mm_range() instead.
-
-The UP variant was removed with commit:
-
-  ce4a4e565f52 ("x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code")
-
-... but the preempt_disable/enable() pair stayed around.
-
-The latest change to __native_flush_tlb() in commit:
-
-  6fd166aae78c ("x86/mm: Use/Fix PCID to optimize user/kernel switches")
-
-... added an access to a per CPU variable outside the preempt disabled
-regions, which makes no sense at all. __native_flush_tlb() must always
-be called with at least preemption disabled.
-
-Remove the preempt_disable/enable() pair and add a WARN_ON_ONCE() to catch
-bad callers independent of the smp_processor_id() debugging.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: <stable@vger.kernel.org>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: Dominik Brodowski <linux@dominikbrodowski.net>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Linus Torvalds <torvalds@linuxfoundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Link: http://lkml.kernel.org/r/20171230211829.679325424@linutronix.de
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit decab0888e6e14e11d53cefa85f8b3d3b45ce73c)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit cfcf931c425b60d0092bcb4a4deb1f5d5db0e293)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/tlbflush.h | 14 ++++++++------
- 1 file changed, 8 insertions(+), 6 deletions(-)
-
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 7a04a1f1ca11..ff6a6d668c32 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -334,15 +334,17 @@ static inline void invalidate_user_asid(u16 asid)
-  */
- static inline void __native_flush_tlb(void)
- {
--      invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
-       /*
--       * If current->mm == NULL then we borrow a mm which may change
--       * during a task switch and therefore we must not be preempted
--       * while we write CR3 back:
-+       * Preemption or interrupts must be disabled to protect the access
-+       * to the per CPU variable and to prevent being preempted between
-+       * read_cr3() and write_cr3().
-        */
--      preempt_disable();
-+      WARN_ON_ONCE(preemptible());
-+
-+      invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
-+
-+      /* If current->mm == NULL then the read_cr3() "borrows" an mm */
-       native_write_cr3(__native_read_cr3());
--      preempt_enable();
- }
- 
- /*
--- 
-2.14.2
-
diff --git a/patches/kernel/0222-x86-smpboot-Remove-stale-TLB-flush-invocations.patch b/patches/kernel/0222-x86-smpboot-Remove-stale-TLB-flush-invocations.patch

new file mode 100644 (file)

index 0000000..b5c62d0
--- /dev/null
+++ b/patches/kernel/0222-x86-smpboot-Remove-stale-TLB-flush-invocations.patch
@@ -0,0 +1,74 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 30 Dec 2017 22:13:53 +0100
+Subject: [PATCH] x86/smpboot: Remove stale TLB flush invocations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+smpboot_setup_warm_reset_vector() and smpboot_restore_warm_reset_vector()
+invoke local_flush_tlb() for no obvious reason.
+
+Digging in history revealed that the original code in the 2.1 era added
+those because the code manipulated a swapper_pg_dir pagetable entry. The
+pagetable manipulation was removed long ago in the 2.3 timeframe, but the
+TLB flush invocations stayed around forever.
+
+Remove them along with the pointless pr_debug()s which come from the same 2.1
+change.
+
+Reported-by: Dominik Brodowski <linux@dominikbrodowski.net>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/20171230211829.586548655@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 322f8b8b340c824aef891342b0f5795d15e11562)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit fb08c4a80a22dc79c9775f493e291dfe2c642b86)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/smpboot.c | 9 ---------
+ 1 file changed, 9 deletions(-)
+
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index 03d2ba2da3b0..6ad8391b9866 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -128,25 +128,16 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
+       spin_lock_irqsave(&rtc_lock, flags);
+       CMOS_WRITE(0xa, 0xf);
+       spin_unlock_irqrestore(&rtc_lock, flags);
+-      local_flush_tlb();
+-      pr_debug("1.\n");
+       *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
+                                                       start_eip >> 4;
+-      pr_debug("2.\n");
+       *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
+                                                       start_eip & 0xf;
+-      pr_debug("3.\n");
+ }
+ 
+ static inline void smpboot_restore_warm_reset_vector(void)
+ {
+       unsigned long flags;
+ 
+-      /*
+-       * Install writable page 0 entry to set BIOS data area.
+-       */
+-      local_flush_tlb();
+-
+       /*
+        * Paranoid:  Set warm reset code and vector here back
+        * to default values.
+-- 
+2.14.2
+
diff --git a/patches/kernel/0223-x86-ldt-Plug-memory-leak-in-error-path.patch b/patches/kernel/0223-x86-ldt-Plug-memory-leak-in-error-path.patch

deleted file mode 100644 (file)

index ef3580a..0000000
--- a/patches/kernel/0223-x86-ldt-Plug-memory-leak-in-error-path.patch
+++ /dev/null
@@ -1,63 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Sun, 31 Dec 2017 11:24:34 +0100
-Subject: [PATCH] x86/ldt: Plug memory leak in error path
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The error path in write_ldt() tries to free 'old_ldt' instead of the newly
-allocated 'new_ldt', resulting in a memory leak. It also misses to clean up a
-half populated LDT pagetable, which is not a leak as it gets cleaned up
-when the process exits.
-
-Free both the potentially half populated LDT pagetable and the newly
-allocated LDT struct. This can be done unconditionally because once an LDT
-is mapped subsequent maps will succeed, because the PTE page is already
-populated and the two LDTs fit into that single page.
-
-Reported-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: Dominik Brodowski <linux@dominikbrodowski.net>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Linus Torvalds <torvalds@linuxfoundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Fixes: f55f0501cbf6 ("x86/pti: Put the LDT in its own PGD if PTI is on")
-Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1712311121340.1899@nanos
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit a62d69857aab4caa43049e72fe0ed5c4a60518dd)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 03d02494f6253d0bdca7254d85e50786448c14f9)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/ldt.c | 8 +++++++-
- 1 file changed, 7 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
-index 2260eb6e2de7..9a35b7e541bc 100644
---- a/arch/x86/kernel/ldt.c
-+++ b/arch/x86/kernel/ldt.c
-@@ -420,7 +420,13 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
-        */
-       error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
-       if (error) {
--              free_ldt_struct(old_ldt);
-+              /*
-+               * This only can fail for the first LDT setup. If an LDT is
-+               * already installed then the PTE page is already
-+               * populated. Mop up a half populated page table.
-+               */
-+              free_ldt_pgtables(mm);
-+              free_ldt_struct(new_ldt);
-               goto out_unlock;
-       }
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0223-x86-mm-Remove-preempt_disable-enable-from-__native_f.patch b/patches/kernel/0223-x86-mm-Remove-preempt_disable-enable-from-__native_f.patch

new file mode 100644 (file)

index 0000000..934ff9b
--- /dev/null
+++ b/patches/kernel/0223-x86-mm-Remove-preempt_disable-enable-from-__native_f.patch
@@ -0,0 +1,89 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 30 Dec 2017 22:13:54 +0100
+Subject: [PATCH] x86/mm: Remove preempt_disable/enable() from
+ __native_flush_tlb()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The preempt_disable/enable() pair in __native_flush_tlb() was added in
+commit:
+
+  5cf0791da5c1 ("x86/mm: Disable preemption during CR3 read+write")
+
+... to protect the UP variant of flush_tlb_mm_range().
+
+That preempt_disable/enable() pair should have been added to the UP variant
+of flush_tlb_mm_range() instead.
+
+The UP variant was removed with commit:
+
+  ce4a4e565f52 ("x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code")
+
+... but the preempt_disable/enable() pair stayed around.
+
+The latest change to __native_flush_tlb() in commit:
+
+  6fd166aae78c ("x86/mm: Use/Fix PCID to optimize user/kernel switches")
+
+... added an access to a per CPU variable outside the preempt disabled
+regions, which makes no sense at all. __native_flush_tlb() must always
+be called with at least preemption disabled.
+
+Remove the preempt_disable/enable() pair and add a WARN_ON_ONCE() to catch
+bad callers independent of the smp_processor_id() debugging.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Dominik Brodowski <linux@dominikbrodowski.net>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/20171230211829.679325424@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit decab0888e6e14e11d53cefa85f8b3d3b45ce73c)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit cfcf931c425b60d0092bcb4a4deb1f5d5db0e293)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/tlbflush.h | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 7a04a1f1ca11..ff6a6d668c32 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -334,15 +334,17 @@ static inline void invalidate_user_asid(u16 asid)
+  */
+ static inline void __native_flush_tlb(void)
+ {
+-      invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+       /*
+-       * If current->mm == NULL then we borrow a mm which may change
+-       * during a task switch and therefore we must not be preempted
+-       * while we write CR3 back:
++       * Preemption or interrupts must be disabled to protect the access
++       * to the per CPU variable and to prevent being preempted between
++       * read_cr3() and write_cr3().
+        */
+-      preempt_disable();
++      WARN_ON_ONCE(preemptible());
++
++      invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
++
++      /* If current->mm == NULL then the read_cr3() "borrows" an mm */
+       native_write_cr3(__native_read_cr3());
+-      preempt_enable();
+ }
+ 
+ /*
+-- 
+2.14.2
+
diff --git a/patches/kernel/0224-x86-ldt-Make-LDT-pgtable-free-conditional.patch b/patches/kernel/0224-x86-ldt-Make-LDT-pgtable-free-conditional.patch

deleted file mode 100644 (file)

index 5d177cd..0000000
--- a/patches/kernel/0224-x86-ldt-Make-LDT-pgtable-free-conditional.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Sun, 31 Dec 2017 16:52:15 +0100
-Subject: [PATCH] x86/ldt: Make LDT pgtable free conditional
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Andy prefers to be paranoid about the pagetable free in the error path of
-write_ldt(). Make it conditional and warn whenever the installment of a
-secondary LDT fails.
-
-Requested-by: Andy Lutomirski <luto@amacapital.net>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-(cherry picked from commit 7f414195b0c3612acd12b4611a5fe75995cf10c7)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 4e23d9d8427c9b2bd10176bd56dfcaca5e0d6b0f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/ldt.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
-index 9a35b7e541bc..51af781fac85 100644
---- a/arch/x86/kernel/ldt.c
-+++ b/arch/x86/kernel/ldt.c
-@@ -425,7 +425,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
-                * already installed then the PTE page is already
-                * populated. Mop up a half populated page table.
-                */
--              free_ldt_pgtables(mm);
-+              if (!WARN_ON_ONCE(old_ldt))
-+                      free_ldt_pgtables(mm);
-               free_ldt_struct(new_ldt);
-               goto out_unlock;
-       }
--- 
-2.14.2
-
diff --git a/patches/kernel/0224-x86-ldt-Plug-memory-leak-in-error-path.patch b/patches/kernel/0224-x86-ldt-Plug-memory-leak-in-error-path.patch

new file mode 100644 (file)

index 0000000..ef3580a
--- /dev/null
+++ b/patches/kernel/0224-x86-ldt-Plug-memory-leak-in-error-path.patch
@@ -0,0 +1,63 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 31 Dec 2017 11:24:34 +0100
+Subject: [PATCH] x86/ldt: Plug memory leak in error path
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The error path in write_ldt() tries to free 'old_ldt' instead of the newly
+allocated 'new_ldt', resulting in a memory leak. It also misses to clean up a
+half populated LDT pagetable, which is not a leak as it gets cleaned up
+when the process exits.
+
+Free both the potentially half populated LDT pagetable and the newly
+allocated LDT struct. This can be done unconditionally because once an LDT
+is mapped subsequent maps will succeed, because the PTE page is already
+populated and the two LDTs fit into that single page.
+
+Reported-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Dominik Brodowski <linux@dominikbrodowski.net>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Fixes: f55f0501cbf6 ("x86/pti: Put the LDT in its own PGD if PTI is on")
+Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1712311121340.1899@nanos
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit a62d69857aab4caa43049e72fe0ed5c4a60518dd)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 03d02494f6253d0bdca7254d85e50786448c14f9)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/ldt.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
+index 2260eb6e2de7..9a35b7e541bc 100644
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -420,7 +420,13 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
+        */
+       error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
+       if (error) {
+-              free_ldt_struct(old_ldt);
++              /*
++               * This only can fail for the first LDT setup. If an LDT is
++               * already installed then the PTE page is already
++               * populated. Mop up a half populated page table.
++               */
++              free_ldt_pgtables(mm);
++              free_ldt_struct(new_ldt);
+               goto out_unlock;
+       }
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0225-UBUNTU-Config-updateconfigs-to-enable-PTI.patch b/patches/kernel/0225-UBUNTU-Config-updateconfigs-to-enable-PTI.patch

deleted file mode 100644 (file)

index 66452e5..0000000
--- a/patches/kernel/0225-UBUNTU-Config-updateconfigs-to-enable-PTI.patch
+++ /dev/null
@@ -1,77 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Whitcroft <apw@canonical.com>
-Date: Mon, 18 Dec 2017 12:09:25 +0000
-Subject: [PATCH] UBUNTU: [Config] updateconfigs to enable PTI
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 239497489e75fe18b55f568a43c76fd65a7cbf4f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- debian.master/config/amd64/config.common.amd64 | 1 +
- debian.master/config/config.common.ubuntu      | 5 ++++-
- debian.master/config/i386/config.common.i386   | 1 +
- 3 files changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/debian.master/config/amd64/config.common.amd64 b/debian.master/config/amd64/config.common.amd64
-index 6df8bcf72690..6412a1366160 100644
---- a/debian.master/config/amd64/config.common.amd64
-+++ b/debian.master/config/amd64/config.common.amd64
-@@ -471,6 +471,7 @@ CONFIG_UIO_PRUSS=m
- CONFIG_UIO_SERCOS3=m
- CONFIG_ULTRIX_PARTITION=y
- CONFIG_UNIXWARE_DISKLABEL=y
-+# CONFIG_UNWINDER_FRAME_POINTER is not set
- CONFIG_USB_DWC2_PCI=m
- CONFIG_USB_EHCI_HCD_PLATFORM=y
- CONFIG_USB_GADGET=m
-diff --git a/debian.master/config/config.common.ubuntu b/debian.master/config/config.common.ubuntu
-index 37a14874f7f9..ebb00db16844 100644
---- a/debian.master/config/config.common.ubuntu
-+++ b/debian.master/config/config.common.ubuntu
-@@ -6201,6 +6201,7 @@ CONFIG_PADATA=y
- CONFIG_PAGE_COUNTER=y
- # CONFIG_PAGE_OWNER is not set
- # CONFIG_PAGE_POISONING is not set
-+CONFIG_PAGE_TABLE_ISOLATION=y
- CONFIG_PALMAS_GPADC=m
- CONFIG_PANASONIC_LAPTOP=m
- CONFIG_PANEL=m
-@@ -8659,7 +8660,7 @@ CONFIG_STACKTRACE=y
- CONFIG_STACKTRACE_SUPPORT=y
- CONFIG_STACK_GUARD=256
- CONFIG_STACK_TRACER=y
--# CONFIG_STACK_VALIDATION is not set
-+CONFIG_STACK_VALIDATION=y
- # CONFIG_STAGING_BOARD is not set
- CONFIG_STAGING_MEDIA=y
- # CONFIG_STATIC_KEYS_SELFTEST is not set
-@@ -9173,6 +9174,8 @@ CONFIG_UNIX=y
- CONFIG_UNIX98_PTYS=y
- CONFIG_UNIX_DIAG=m
- CONFIG_UNUSED_SYMBOLS=y
-+# CONFIG_UNWINDER_GUESS is not set
-+CONFIG_UNWINDER_ORC=y
- CONFIG_UPROBES=y
- CONFIG_UPROBE_EVENTS=y
- CONFIG_US5182D=m
-diff --git a/debian.master/config/i386/config.common.i386 b/debian.master/config/i386/config.common.i386
-index eb973e0eb199..4b8d6a14e31c 100644
---- a/debian.master/config/i386/config.common.i386
-+++ b/debian.master/config/i386/config.common.i386
-@@ -463,6 +463,7 @@ CONFIG_UIO_PRUSS=m
- CONFIG_UIO_SERCOS3=m
- CONFIG_ULTRIX_PARTITION=y
- CONFIG_UNIXWARE_DISKLABEL=y
-+CONFIG_UNWINDER_FRAME_POINTER=y
- CONFIG_USB_DWC2_PCI=m
- CONFIG_USB_EHCI_HCD_PLATFORM=y
- CONFIG_USB_GADGET=m
--- 
-2.14.2
-
diff --git a/patches/kernel/0225-x86-ldt-Make-LDT-pgtable-free-conditional.patch b/patches/kernel/0225-x86-ldt-Make-LDT-pgtable-free-conditional.patch

new file mode 100644 (file)

index 0000000..5d177cd
--- /dev/null
+++ b/patches/kernel/0225-x86-ldt-Make-LDT-pgtable-free-conditional.patch
@@ -0,0 +1,42 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 31 Dec 2017 16:52:15 +0100
+Subject: [PATCH] x86/ldt: Make LDT pgtable free conditional
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Andy prefers to be paranoid about the pagetable free in the error path of
+write_ldt(). Make it conditional and warn whenever the installment of a
+secondary LDT fails.
+
+Requested-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+(cherry picked from commit 7f414195b0c3612acd12b4611a5fe75995cf10c7)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 4e23d9d8427c9b2bd10176bd56dfcaca5e0d6b0f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/ldt.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
+index 9a35b7e541bc..51af781fac85 100644
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -425,7 +425,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
+                * already installed then the PTE page is already
+                * populated. Mop up a half populated page table.
+                */
+-              free_ldt_pgtables(mm);
++              if (!WARN_ON_ONCE(old_ldt))
++                      free_ldt_pgtables(mm);
+               free_ldt_struct(new_ldt);
+               goto out_unlock;
+       }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0226-UBUNTU-Config-updateconfigs-to-enable-PTI.patch b/patches/kernel/0226-UBUNTU-Config-updateconfigs-to-enable-PTI.patch

new file mode 100644 (file)

index 0000000..66452e5
--- /dev/null
+++ b/patches/kernel/0226-UBUNTU-Config-updateconfigs-to-enable-PTI.patch
@@ -0,0 +1,77 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Whitcroft <apw@canonical.com>
+Date: Mon, 18 Dec 2017 12:09:25 +0000
+Subject: [PATCH] UBUNTU: [Config] updateconfigs to enable PTI
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 239497489e75fe18b55f568a43c76fd65a7cbf4f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ debian.master/config/amd64/config.common.amd64 | 1 +
+ debian.master/config/config.common.ubuntu      | 5 ++++-
+ debian.master/config/i386/config.common.i386   | 1 +
+ 3 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/debian.master/config/amd64/config.common.amd64 b/debian.master/config/amd64/config.common.amd64
+index 6df8bcf72690..6412a1366160 100644
+--- a/debian.master/config/amd64/config.common.amd64
++++ b/debian.master/config/amd64/config.common.amd64
+@@ -471,6 +471,7 @@ CONFIG_UIO_PRUSS=m
+ CONFIG_UIO_SERCOS3=m
+ CONFIG_ULTRIX_PARTITION=y
+ CONFIG_UNIXWARE_DISKLABEL=y
++# CONFIG_UNWINDER_FRAME_POINTER is not set
+ CONFIG_USB_DWC2_PCI=m
+ CONFIG_USB_EHCI_HCD_PLATFORM=y
+ CONFIG_USB_GADGET=m
+diff --git a/debian.master/config/config.common.ubuntu b/debian.master/config/config.common.ubuntu
+index 37a14874f7f9..ebb00db16844 100644
+--- a/debian.master/config/config.common.ubuntu
++++ b/debian.master/config/config.common.ubuntu
+@@ -6201,6 +6201,7 @@ CONFIG_PADATA=y
+ CONFIG_PAGE_COUNTER=y
+ # CONFIG_PAGE_OWNER is not set
+ # CONFIG_PAGE_POISONING is not set
++CONFIG_PAGE_TABLE_ISOLATION=y
+ CONFIG_PALMAS_GPADC=m
+ CONFIG_PANASONIC_LAPTOP=m
+ CONFIG_PANEL=m
+@@ -8659,7 +8660,7 @@ CONFIG_STACKTRACE=y
+ CONFIG_STACKTRACE_SUPPORT=y
+ CONFIG_STACK_GUARD=256
+ CONFIG_STACK_TRACER=y
+-# CONFIG_STACK_VALIDATION is not set
++CONFIG_STACK_VALIDATION=y
+ # CONFIG_STAGING_BOARD is not set
+ CONFIG_STAGING_MEDIA=y
+ # CONFIG_STATIC_KEYS_SELFTEST is not set
+@@ -9173,6 +9174,8 @@ CONFIG_UNIX=y
+ CONFIG_UNIX98_PTYS=y
+ CONFIG_UNIX_DIAG=m
+ CONFIG_UNUSED_SYMBOLS=y
++# CONFIG_UNWINDER_GUESS is not set
++CONFIG_UNWINDER_ORC=y
+ CONFIG_UPROBES=y
+ CONFIG_UPROBE_EVENTS=y
+ CONFIG_US5182D=m
+diff --git a/debian.master/config/i386/config.common.i386 b/debian.master/config/i386/config.common.i386
+index eb973e0eb199..4b8d6a14e31c 100644
+--- a/debian.master/config/i386/config.common.i386
++++ b/debian.master/config/i386/config.common.i386
+@@ -463,6 +463,7 @@ CONFIG_UIO_PRUSS=m
+ CONFIG_UIO_SERCOS3=m
+ CONFIG_ULTRIX_PARTITION=y
+ CONFIG_UNIXWARE_DISKLABEL=y
++CONFIG_UNWINDER_FRAME_POINTER=y
+ CONFIG_USB_DWC2_PCI=m
+ CONFIG_USB_EHCI_HCD_PLATFORM=y
+ CONFIG_USB_GADGET=m
+-- 
+2.14.2
+
diff --git a/patches/kernel/0226-kvm-x86-fix-RSM-when-PCID-is-non-zero.patch b/patches/kernel/0226-kvm-x86-fix-RSM-when-PCID-is-non-zero.patch

deleted file mode 100644 (file)

index e83e451..0000000
--- a/patches/kernel/0226-kvm-x86-fix-RSM-when-PCID-is-non-zero.patch
+++ /dev/null
@@ -1,124 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Paolo Bonzini <pbonzini@redhat.com>
-Date: Thu, 21 Dec 2017 00:49:14 +0100
-Subject: [PATCH] kvm: x86: fix RSM when PCID is non-zero
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-rsm_load_state_64() and rsm_enter_protected_mode() load CR3, then
-CR4 & ~PCIDE, then CR0, then CR4.
-
-However, setting CR4.PCIDE fails if CR3[11:0] != 0.  It's probably easier
-in the long run to replace rsm_enter_protected_mode() with an emulator
-callback that sets all the special registers (like KVM_SET_SREGS would
-do).  For now, set the PCID field of CR3 only after CR4.PCIDE is 1.
-
-Reported-by: Laszlo Ersek <lersek@redhat.com>
-Tested-by: Laszlo Ersek <lersek@redhat.com>
-Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c
-Cc: stable@vger.kernel.org
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-(cherry picked from commit fae1a3e775cca8c3a9e0eb34443b310871a15a92)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit dba4ceb9a91ed2d11a47722436b3c0be15e791d4)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kvm/emulate.c | 32 +++++++++++++++++++++++++-------
- 1 file changed, 25 insertions(+), 7 deletions(-)
-
-diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
-index fb0055953fbc..155f2af2cb39 100644
---- a/arch/x86/kvm/emulate.c
-+++ b/arch/x86/kvm/emulate.c
-@@ -2399,9 +2399,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
- }
- 
- static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
--                                   u64 cr0, u64 cr4)
-+                                  u64 cr0, u64 cr3, u64 cr4)
- {
-       int bad;
-+      u64 pcid;
-+
-+      /* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
-+      pcid = 0;
-+      if (cr4 & X86_CR4_PCIDE) {
-+              pcid = cr3 & 0xfff;
-+              cr3 &= ~0xfff;
-+      }
-+
-+      bad = ctxt->ops->set_cr(ctxt, 3, cr3);
-+      if (bad)
-+              return X86EMUL_UNHANDLEABLE;
- 
-       /*
-        * First enable PAE, long mode needs it before CR0.PG = 1 is set.
-@@ -2420,6 +2432,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
-               bad = ctxt->ops->set_cr(ctxt, 4, cr4);
-               if (bad)
-                       return X86EMUL_UNHANDLEABLE;
-+              if (pcid) {
-+                      bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
-+                      if (bad)
-+                              return X86EMUL_UNHANDLEABLE;
-+              }
-+
-       }
- 
-       return X86EMUL_CONTINUE;
-@@ -2430,11 +2448,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
-       struct desc_struct desc;
-       struct desc_ptr dt;
-       u16 selector;
--      u32 val, cr0, cr4;
-+      u32 val, cr0, cr3, cr4;
-       int i;
- 
-       cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
--      ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
-+      cr3 =                      GET_SMSTATE(u32, smbase, 0x7ff8);
-       ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
-       ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);
- 
-@@ -2476,14 +2494,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
- 
-       ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
- 
--      return rsm_enter_protected_mode(ctxt, cr0, cr4);
-+      return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
- }
- 
- static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
- {
-       struct desc_struct desc;
-       struct desc_ptr dt;
--      u64 val, cr0, cr4;
-+      u64 val, cr0, cr3, cr4;
-       u32 base3;
-       u16 selector;
-       int i, r;
-@@ -2500,7 +2518,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
-       ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
- 
-       cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
--      ctxt->ops->set_cr(ctxt, 3,  GET_SMSTATE(u64, smbase, 0x7f50));
-+      cr3 =                       GET_SMSTATE(u64, smbase, 0x7f50);
-       cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
-       ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
-       val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
-@@ -2528,7 +2546,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
-       dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
-       ctxt->ops->set_gdt(ctxt, &dt);
- 
--      r = rsm_enter_protected_mode(ctxt, cr0, cr4);
-+      r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
-       if (r != X86EMUL_CONTINUE)
-               return r;
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0227-kvm-x86-fix-RSM-when-PCID-is-non-zero.patch b/patches/kernel/0227-kvm-x86-fix-RSM-when-PCID-is-non-zero.patch

new file mode 100644 (file)

index 0000000..e83e451
--- /dev/null
+++ b/patches/kernel/0227-kvm-x86-fix-RSM-when-PCID-is-non-zero.patch
@@ -0,0 +1,124 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 21 Dec 2017 00:49:14 +0100
+Subject: [PATCH] kvm: x86: fix RSM when PCID is non-zero
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+rsm_load_state_64() and rsm_enter_protected_mode() load CR3, then
+CR4 & ~PCIDE, then CR0, then CR4.
+
+However, setting CR4.PCIDE fails if CR3[11:0] != 0.  It's probably easier
+in the long run to replace rsm_enter_protected_mode() with an emulator
+callback that sets all the special registers (like KVM_SET_SREGS would
+do).  For now, set the PCID field of CR3 only after CR4.PCIDE is 1.
+
+Reported-by: Laszlo Ersek <lersek@redhat.com>
+Tested-by: Laszlo Ersek <lersek@redhat.com>
+Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit fae1a3e775cca8c3a9e0eb34443b310871a15a92)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit dba4ceb9a91ed2d11a47722436b3c0be15e791d4)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/emulate.c | 32 +++++++++++++++++++++++++-------
+ 1 file changed, 25 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index fb0055953fbc..155f2af2cb39 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -2399,9 +2399,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+ }
+ 
+ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+-                                   u64 cr0, u64 cr4)
++                                  u64 cr0, u64 cr3, u64 cr4)
+ {
+       int bad;
++      u64 pcid;
++
++      /* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
++      pcid = 0;
++      if (cr4 & X86_CR4_PCIDE) {
++              pcid = cr3 & 0xfff;
++              cr3 &= ~0xfff;
++      }
++
++      bad = ctxt->ops->set_cr(ctxt, 3, cr3);
++      if (bad)
++              return X86EMUL_UNHANDLEABLE;
+ 
+       /*
+        * First enable PAE, long mode needs it before CR0.PG = 1 is set.
+@@ -2420,6 +2432,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+               bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+               if (bad)
+                       return X86EMUL_UNHANDLEABLE;
++              if (pcid) {
++                      bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
++                      if (bad)
++                              return X86EMUL_UNHANDLEABLE;
++              }
++
+       }
+ 
+       return X86EMUL_CONTINUE;
+@@ -2430,11 +2448,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
+       struct desc_struct desc;
+       struct desc_ptr dt;
+       u16 selector;
+-      u32 val, cr0, cr4;
++      u32 val, cr0, cr3, cr4;
+       int i;
+ 
+       cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
+-      ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
++      cr3 =                      GET_SMSTATE(u32, smbase, 0x7ff8);
+       ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
+       ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);
+ 
+@@ -2476,14 +2494,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
+ 
+       ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
+ 
+-      return rsm_enter_protected_mode(ctxt, cr0, cr4);
++      return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ }
+ 
+ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+ {
+       struct desc_struct desc;
+       struct desc_ptr dt;
+-      u64 val, cr0, cr4;
++      u64 val, cr0, cr3, cr4;
+       u32 base3;
+       u16 selector;
+       int i, r;
+@@ -2500,7 +2518,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+       ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+ 
+       cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
+-      ctxt->ops->set_cr(ctxt, 3,  GET_SMSTATE(u64, smbase, 0x7f50));
++      cr3 =                       GET_SMSTATE(u64, smbase, 0x7f50);
+       cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
+       ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
+       val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
+@@ -2528,7 +2546,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+       dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
+       ctxt->ops->set_gdt(ctxt, &dt);
+ 
+-      r = rsm_enter_protected_mode(ctxt, cr0, cr4);
++      r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+       if (r != X86EMUL_CONTINUE)
+               return r;
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0227-x86-pti-Switch-to-kernel-CR3-at-early-in-entry_SYSCA.patch b/patches/kernel/0227-x86-pti-Switch-to-kernel-CR3-at-early-in-entry_SYSCA.patch

deleted file mode 100644 (file)

index 547c382..0000000
--- a/patches/kernel/0227-x86-pti-Switch-to-kernel-CR3-at-early-in-entry_SYSCA.patch
+++ /dev/null
@@ -1,78 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 3 Jan 2018 19:52:04 +0100
-Subject: [PATCH] x86/pti: Switch to kernel CR3 at early in
- entry_SYSCALL_compat()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The preparation for PTI which added CR3 switching to the entry code
-misplaced the CR3 switch in entry_SYSCALL_compat().
-
-With PTI enabled the entry code tries to access a per cpu variable after
-switching to kernel GS. This fails because that variable is not mapped to
-user space. This results in a double fault and in the worst case a kernel
-crash.
-
-Move the switch ahead of the access and clobber RSP which has been saved
-already.
-
-Fixes: 8a09317b895f ("x86/mm/pti: Prepare the x86/entry assembly code for entry/exit CR3 switching")
-Reported-by: Lars Wendler <wendler.lars@web.de>
-Reported-by: Laura Abbott <labbott@redhat.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Borislav Betkov <bp@alien8.de>
-Cc: Andy Lutomirski <luto@kernel.org>,
-Cc: Dave Hansen <dave.hansen@linux.intel.com>,
-Cc: Peter Zijlstra <peterz@infradead.org>,
-Cc: Greg KH <gregkh@linuxfoundation.org>, ,
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>,
-Cc: Juergen Gross <jgross@suse.com>
-Cc: stable@vger.kernel.org
-Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801031949200.1957@nanos
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 2f45cd7a57da0a4d7f3a91a5f577c76b9ed9eb8a)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64_compat.S | 13 ++++++-------
- 1 file changed, 6 insertions(+), 7 deletions(-)
-
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index 973527e34887..2b5e7685823c 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -189,8 +189,13 @@ ENTRY(entry_SYSCALL_compat)
-       /* Interrupts are off on entry. */
-       swapgs
- 
--      /* Stash user ESP and switch to the kernel stack. */
-+      /* Stash user ESP.*/
-       movl    %esp, %r8d
-+
-+      /* Use %rsp as scratch reg. User ESP is stashed in r8 */
-+      SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
-+
-+      /* Switch to the kernel stack */
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- 
-       /* Construct struct pt_regs on stack */
-@@ -218,12 +223,6 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
-       pushq   $0                      /* pt_regs->r14 = 0 */
-       pushq   $0                      /* pt_regs->r15 = 0 */
- 
--      /*
--       * We just saved %rdi so it is safe to clobber.  It is not
--       * preserved during the C calls inside TRACE_IRQS_OFF anyway.
--       */
--      SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
--
-       /*
-        * User mode is traced as though IRQs are on, and SYSENTER
-        * turned them off.
--- 
-2.14.2
-
diff --git a/patches/kernel/0228-UBUNTU-SAUCE-bpf-reject-out-of-bounds-stack-pointer-.patch b/patches/kernel/0228-UBUNTU-SAUCE-bpf-reject-out-of-bounds-stack-pointer-.patch

deleted file mode 100644 (file)

index 802ae9f..0000000
--- a/patches/kernel/0228-UBUNTU-SAUCE-bpf-reject-out-of-bounds-stack-pointer-.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jann Horn <jannh@google.com>
-Date: Thu, 4 Jan 2018 08:01:21 -0600
-Subject: [PATCH] UBUNTU: SAUCE: bpf: reject out-of-bounds stack pointer
- calculation
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Reject programs that compute wildly out-of-bounds stack pointers.
-Otherwise, pointers can be computed with an offset that doesn't fit into an
-`int`, causing security issues in the stack memory access check (as well as
-signed integer overflow during offset addition).
-
-This is a fix specifically for the v4.9 stable tree because the mainline
-code looks very different at this point.
-
-Fixes: 7bca0a9702edf ("bpf: enhance verifier to understand stack pointer arithmetic")
-Signed-off-by: Jann Horn <jannh@google.com>
-Acked-by: Daniel Borkmann <daniel@iogearbox.net>
-CVE-2017-17863
-Link: https://www.spinics.net/lists/stable/msg206985.html
-Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 1c26ffd0e9b24d512824cabc6687a14d4777d0f3)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- kernel/bpf/verifier.c | 22 ++++++++++++++++++++--
- 1 file changed, 20 insertions(+), 2 deletions(-)
-
-diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
-index 3940019b9740..4321625fe32a 100644
---- a/kernel/bpf/verifier.c
-+++ b/kernel/bpf/verifier.c
-@@ -2122,10 +2122,28 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
-                          ((BPF_SRC(insn->code) == BPF_X &&
-                            regs[insn->src_reg].type == CONST_IMM) ||
-                           BPF_SRC(insn->code) == BPF_K)) {
--                      if (BPF_SRC(insn->code) == BPF_X)
-+                      if (BPF_SRC(insn->code) == BPF_X) {
-+                              /* check in case the register contains a big
-+                               * 64-bit value
-+                               */
-+                              if (regs[insn->src_reg].imm < -MAX_BPF_STACK ||
-+                                  regs[insn->src_reg].imm > MAX_BPF_STACK) {
-+                                      verbose("R%d value too big in R%d pointer arithmetic\n",
-+                                              insn->src_reg, insn->dst_reg);
-+                                      return -EACCES;
-+                              }
-                               dst_reg->imm += regs[insn->src_reg].imm;
--                      else
-+                      } else {
-+                              /* safe against overflow: addition of 32-bit
-+                               * numbers in 64-bit representation
-+                               */
-                               dst_reg->imm += insn->imm;
-+                      }
-+                      if (dst_reg->imm > 0 || dst_reg->imm < -MAX_BPF_STACK) {
-+                              verbose("R%d out-of-bounds pointer arithmetic\n",
-+                                      insn->dst_reg);
-+                              return -EACCES;
-+                      }
-                       return 0;
-               } else if (opcode == BPF_ADD &&
-                          BPF_CLASS(insn->code) == BPF_ALU64 &&
--- 
-2.14.2
-
diff --git a/patches/kernel/0228-x86-pti-Switch-to-kernel-CR3-at-early-in-entry_SYSCA.patch b/patches/kernel/0228-x86-pti-Switch-to-kernel-CR3-at-early-in-entry_SYSCA.patch

new file mode 100644 (file)

index 0000000..547c382
--- /dev/null
+++ b/patches/kernel/0228-x86-pti-Switch-to-kernel-CR3-at-early-in-entry_SYSCA.patch
@@ -0,0 +1,78 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 3 Jan 2018 19:52:04 +0100
+Subject: [PATCH] x86/pti: Switch to kernel CR3 at early in
+ entry_SYSCALL_compat()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The preparation for PTI which added CR3 switching to the entry code
+misplaced the CR3 switch in entry_SYSCALL_compat().
+
+With PTI enabled the entry code tries to access a per cpu variable after
+switching to kernel GS. This fails because that variable is not mapped to
+user space. This results in a double fault and in the worst case a kernel
+crash.
+
+Move the switch ahead of the access and clobber RSP which has been saved
+already.
+
+Fixes: 8a09317b895f ("x86/mm/pti: Prepare the x86/entry assembly code for entry/exit CR3 switching")
+Reported-by: Lars Wendler <wendler.lars@web.de>
+Reported-by: Laura Abbott <labbott@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Betkov <bp@alien8.de>
+Cc: Andy Lutomirski <luto@kernel.org>,
+Cc: Dave Hansen <dave.hansen@linux.intel.com>,
+Cc: Peter Zijlstra <peterz@infradead.org>,
+Cc: Greg KH <gregkh@linuxfoundation.org>, ,
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>,
+Cc: Juergen Gross <jgross@suse.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801031949200.1957@nanos
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 2f45cd7a57da0a4d7f3a91a5f577c76b9ed9eb8a)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64_compat.S | 13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index 973527e34887..2b5e7685823c 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -189,8 +189,13 @@ ENTRY(entry_SYSCALL_compat)
+       /* Interrupts are off on entry. */
+       swapgs
+ 
+-      /* Stash user ESP and switch to the kernel stack. */
++      /* Stash user ESP.*/
+       movl    %esp, %r8d
++
++      /* Use %rsp as scratch reg. User ESP is stashed in r8 */
++      SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
++
++      /* Switch to the kernel stack */
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ 
+       /* Construct struct pt_regs on stack */
+@@ -218,12 +223,6 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
+       pushq   $0                      /* pt_regs->r14 = 0 */
+       pushq   $0                      /* pt_regs->r15 = 0 */
+ 
+-      /*
+-       * We just saved %rdi so it is safe to clobber.  It is not
+-       * preserved during the C calls inside TRACE_IRQS_OFF anyway.
+-       */
+-      SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+-
+       /*
+        * User mode is traced as though IRQs are on, and SYSENTER
+        * turned them off.
+-- 
+2.14.2
+
diff --git a/patches/kernel/0229-UBUNTU-SAUCE-bpf-reject-out-of-bounds-stack-pointer-.patch b/patches/kernel/0229-UBUNTU-SAUCE-bpf-reject-out-of-bounds-stack-pointer-.patch

new file mode 100644 (file)

index 0000000..802ae9f
--- /dev/null
+++ b/patches/kernel/0229-UBUNTU-SAUCE-bpf-reject-out-of-bounds-stack-pointer-.patch
@@ -0,0 +1,69 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Thu, 4 Jan 2018 08:01:21 -0600
+Subject: [PATCH] UBUNTU: SAUCE: bpf: reject out-of-bounds stack pointer
+ calculation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Reject programs that compute wildly out-of-bounds stack pointers.
+Otherwise, pointers can be computed with an offset that doesn't fit into an
+`int`, causing security issues in the stack memory access check (as well as
+signed integer overflow during offset addition).
+
+This is a fix specifically for the v4.9 stable tree because the mainline
+code looks very different at this point.
+
+Fixes: 7bca0a9702edf ("bpf: enhance verifier to understand stack pointer arithmetic")
+Signed-off-by: Jann Horn <jannh@google.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+CVE-2017-17863
+Link: https://www.spinics.net/lists/stable/msg206985.html
+Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 1c26ffd0e9b24d512824cabc6687a14d4777d0f3)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ kernel/bpf/verifier.c | 22 ++++++++++++++++++++--
+ 1 file changed, 20 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 3940019b9740..4321625fe32a 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2122,10 +2122,28 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
+                          ((BPF_SRC(insn->code) == BPF_X &&
+                            regs[insn->src_reg].type == CONST_IMM) ||
+                           BPF_SRC(insn->code) == BPF_K)) {
+-                      if (BPF_SRC(insn->code) == BPF_X)
++                      if (BPF_SRC(insn->code) == BPF_X) {
++                              /* check in case the register contains a big
++                               * 64-bit value
++                               */
++                              if (regs[insn->src_reg].imm < -MAX_BPF_STACK ||
++                                  regs[insn->src_reg].imm > MAX_BPF_STACK) {
++                                      verbose("R%d value too big in R%d pointer arithmetic\n",
++                                              insn->src_reg, insn->dst_reg);
++                                      return -EACCES;
++                              }
+                               dst_reg->imm += regs[insn->src_reg].imm;
+-                      else
++                      } else {
++                              /* safe against overflow: addition of 32-bit
++                               * numbers in 64-bit representation
++                               */
+                               dst_reg->imm += insn->imm;
++                      }
++                      if (dst_reg->imm > 0 || dst_reg->imm < -MAX_BPF_STACK) {
++                              verbose("R%d out-of-bounds pointer arithmetic\n",
++                                      insn->dst_reg);
++                              return -EACCES;
++                      }
+                       return 0;
+               } else if (opcode == BPF_ADD &&
+                          BPF_CLASS(insn->code) == BPF_ALU64 &&
+-- 
+2.14.2
+
diff --git a/patches/kernel/0229-bpf-fix-incorrect-sign-extension-in-check_alu_op.patch b/patches/kernel/0229-bpf-fix-incorrect-sign-extension-in-check_alu_op.patch

deleted file mode 100644 (file)

index 9934332..0000000
--- a/patches/kernel/0229-bpf-fix-incorrect-sign-extension-in-check_alu_op.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jann Horn <jannh@google.com>
-Date: Thu, 4 Jan 2018 08:01:22 -0600
-Subject: [PATCH] bpf: fix incorrect sign extension in check_alu_op()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-[ Upstream commit 95a762e2c8c942780948091f8f2a4f32fce1ac6f ]
-
-Distinguish between
-BPF_ALU64|BPF_MOV|BPF_K (load 32-bit immediate, sign-extended to 64-bit)
-and BPF_ALU|BPF_MOV|BPF_K (load 32-bit immediate, zero-padded to 64-bit);
-only perform sign extension in the first case.
-
-Starting with v4.14, this is exploitable by unprivileged users as long as
-the unprivileged_bpf_disabled sysctl isn't set.
-
-Debian assigned CVE-2017-16995 for this issue.
-
-v3:
- - add CVE number (Ben Hutchings)
-
-Fixes: 484611357c19 ("bpf: allow access into map value arrays")
-Signed-off-by: Jann Horn <jannh@google.com>
-Acked-by: Edward Cree <ecree@solarflare.com>
-Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-CVE-2017-16995
-Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 868c88129c7567525dbde3cb6989a5acd478bd80)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- kernel/bpf/verifier.c | 15 +++++++++++----
- 1 file changed, 11 insertions(+), 4 deletions(-)
-
-diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
-index 4321625fe32a..cdfa07a4ef27 100644
---- a/kernel/bpf/verifier.c
-+++ b/kernel/bpf/verifier.c
-@@ -2048,12 +2048,19 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
-                       /* case: R = imm
-                        * remember the value we stored into this reg
-                        */
-+                      u64 imm;
-+
-+                      if (BPF_CLASS(insn->code) == BPF_ALU64)
-+                              imm = insn->imm;
-+                      else
-+                              imm = (u32)insn->imm;
-+
-                       regs[insn->dst_reg].type = CONST_IMM;
--                      regs[insn->dst_reg].imm = insn->imm;
-+                      regs[insn->dst_reg].imm = imm;
-                       regs[insn->dst_reg].id = 0;
--                      regs[insn->dst_reg].max_value = insn->imm;
--                      regs[insn->dst_reg].min_value = insn->imm;
--                      regs[insn->dst_reg].min_align = calc_align(insn->imm);
-+                      regs[insn->dst_reg].max_value = imm;
-+                      regs[insn->dst_reg].min_value = imm;
-+                      regs[insn->dst_reg].min_align = calc_align(imm);
-                       regs[insn->dst_reg].value_from_signed = false;
-               }
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0230-UBUNTU-SAUCE-bpf-verifier-Fix-states_equal-compariso.patch b/patches/kernel/0230-UBUNTU-SAUCE-bpf-verifier-Fix-states_equal-compariso.patch

deleted file mode 100644 (file)

index 58d7259..0000000
--- a/patches/kernel/0230-UBUNTU-SAUCE-bpf-verifier-Fix-states_equal-compariso.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Ben Hutchings <ben@decadent.org.uk>
-Date: Thu, 4 Jan 2018 08:01:23 -0600
-Subject: [PATCH] UBUNTU: SAUCE: bpf/verifier: Fix states_equal() comparison of
- pointer and UNKNOWN
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-An UNKNOWN_VALUE is not supposed to be derived from a pointer, unless
-pointer leaks are allowed.  Therefore, states_equal() must not treat
-a state with a pointer in a register as "equal" to a state with an
-UNKNOWN_VALUE in that register.
-
-This was fixed differently upstream, but the code around here was
-largely rewritten in 4.14 by commit f1174f77b50c "bpf/verifier: rework
-value tracking".  The bug can be detected by the bpf/verifier sub-test
-"pointer/scalar confusion in state equality check (way 1)".
-
-Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
-Cc: Edward Cree <ecree@solarflare.com>
-Cc: Jann Horn <jannh@google.com>
-Cc: Alexei Starovoitov <ast@kernel.org>
-CVE-2017-17864
-Link: https://anonscm.debian.org/cgit/kernel/linux.git/tree/debian/patches/bugfix/all/bpf-verifier-fix-states_equal-comparison-of-pointer-and-unknown.patch?h=stretch-security
-Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 3fb4378083def9b22f6ae222e75d880fc5c59048)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- kernel/bpf/verifier.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
-index cdfa07a4ef27..4ecb2e10c5e0 100644
---- a/kernel/bpf/verifier.c
-+++ b/kernel/bpf/verifier.c
-@@ -2980,11 +2980,12 @@ static bool states_equal(struct bpf_verifier_env *env,
- 
-               /* If we didn't map access then again we don't care about the
-                * mismatched range values and it's ok if our old type was
--               * UNKNOWN and we didn't go to a NOT_INIT'ed reg.
-+               * UNKNOWN and we didn't go to a NOT_INIT'ed or pointer reg.
-                */
-               if (rold->type == NOT_INIT ||
-                   (!varlen_map_access && rold->type == UNKNOWN_VALUE &&
--                   rcur->type != NOT_INIT))
-+                   rcur->type != NOT_INIT &&
-+                   !__is_pointer_value(env->allow_ptr_leaks, rcur)))
-                       continue;
- 
-               /* Don't care about the reg->id in this case. */
--- 
-2.14.2
-
diff --git a/patches/kernel/0230-bpf-fix-incorrect-sign-extension-in-check_alu_op.patch b/patches/kernel/0230-bpf-fix-incorrect-sign-extension-in-check_alu_op.patch

new file mode 100644 (file)

index 0000000..9934332
--- /dev/null
+++ b/patches/kernel/0230-bpf-fix-incorrect-sign-extension-in-check_alu_op.patch
@@ -0,0 +1,69 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Thu, 4 Jan 2018 08:01:22 -0600
+Subject: [PATCH] bpf: fix incorrect sign extension in check_alu_op()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+[ Upstream commit 95a762e2c8c942780948091f8f2a4f32fce1ac6f ]
+
+Distinguish between
+BPF_ALU64|BPF_MOV|BPF_K (load 32-bit immediate, sign-extended to 64-bit)
+and BPF_ALU|BPF_MOV|BPF_K (load 32-bit immediate, zero-padded to 64-bit);
+only perform sign extension in the first case.
+
+Starting with v4.14, this is exploitable by unprivileged users as long as
+the unprivileged_bpf_disabled sysctl isn't set.
+
+Debian assigned CVE-2017-16995 for this issue.
+
+v3:
+ - add CVE number (Ben Hutchings)
+
+Fixes: 484611357c19 ("bpf: allow access into map value arrays")
+Signed-off-by: Jann Horn <jannh@google.com>
+Acked-by: Edward Cree <ecree@solarflare.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+CVE-2017-16995
+Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 868c88129c7567525dbde3cb6989a5acd478bd80)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ kernel/bpf/verifier.c | 15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 4321625fe32a..cdfa07a4ef27 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2048,12 +2048,19 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
+                       /* case: R = imm
+                        * remember the value we stored into this reg
+                        */
++                      u64 imm;
++
++                      if (BPF_CLASS(insn->code) == BPF_ALU64)
++                              imm = insn->imm;
++                      else
++                              imm = (u32)insn->imm;
++
+                       regs[insn->dst_reg].type = CONST_IMM;
+-                      regs[insn->dst_reg].imm = insn->imm;
++                      regs[insn->dst_reg].imm = imm;
+                       regs[insn->dst_reg].id = 0;
+-                      regs[insn->dst_reg].max_value = insn->imm;
+-                      regs[insn->dst_reg].min_value = insn->imm;
+-                      regs[insn->dst_reg].min_align = calc_align(insn->imm);
++                      regs[insn->dst_reg].max_value = imm;
++                      regs[insn->dst_reg].min_value = imm;
++                      regs[insn->dst_reg].min_align = calc_align(imm);
+                       regs[insn->dst_reg].value_from_signed = false;
+               }
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0231-UBUNTU-SAUCE-bpf-verifier-Fix-states_equal-compariso.patch b/patches/kernel/0231-UBUNTU-SAUCE-bpf-verifier-Fix-states_equal-compariso.patch

new file mode 100644 (file)

index 0000000..58d7259
--- /dev/null
+++ b/patches/kernel/0231-UBUNTU-SAUCE-bpf-verifier-Fix-states_equal-compariso.patch
@@ -0,0 +1,56 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Thu, 4 Jan 2018 08:01:23 -0600
+Subject: [PATCH] UBUNTU: SAUCE: bpf/verifier: Fix states_equal() comparison of
+ pointer and UNKNOWN
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+An UNKNOWN_VALUE is not supposed to be derived from a pointer, unless
+pointer leaks are allowed.  Therefore, states_equal() must not treat
+a state with a pointer in a register as "equal" to a state with an
+UNKNOWN_VALUE in that register.
+
+This was fixed differently upstream, but the code around here was
+largely rewritten in 4.14 by commit f1174f77b50c "bpf/verifier: rework
+value tracking".  The bug can be detected by the bpf/verifier sub-test
+"pointer/scalar confusion in state equality check (way 1)".
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Edward Cree <ecree@solarflare.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Alexei Starovoitov <ast@kernel.org>
+CVE-2017-17864
+Link: https://anonscm.debian.org/cgit/kernel/linux.git/tree/debian/patches/bugfix/all/bpf-verifier-fix-states_equal-comparison-of-pointer-and-unknown.patch?h=stretch-security
+Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 3fb4378083def9b22f6ae222e75d880fc5c59048)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ kernel/bpf/verifier.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index cdfa07a4ef27..4ecb2e10c5e0 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2980,11 +2980,12 @@ static bool states_equal(struct bpf_verifier_env *env,
+ 
+               /* If we didn't map access then again we don't care about the
+                * mismatched range values and it's ok if our old type was
+-               * UNKNOWN and we didn't go to a NOT_INIT'ed reg.
++               * UNKNOWN and we didn't go to a NOT_INIT'ed or pointer reg.
+                */
+               if (rold->type == NOT_INIT ||
+                   (!varlen_map_access && rold->type == UNKNOWN_VALUE &&
+-                   rcur->type != NOT_INIT))
++                   rcur->type != NOT_INIT &&
++                   !__is_pointer_value(env->allow_ptr_leaks, rcur)))
+                       continue;
+ 
+               /* Don't care about the reg->id in this case. */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0231-bpf-fix-branch-pruning-logic.patch b/patches/kernel/0231-bpf-fix-branch-pruning-logic.patch

deleted file mode 100644 (file)

index 977e60a..0000000
--- a/patches/kernel/0231-bpf-fix-branch-pruning-logic.patch
+++ /dev/null
@@ -1,129 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Alexei Starovoitov <ast@fb.com>
-Date: Thu, 4 Jan 2018 08:01:24 -0600
-Subject: [PATCH] bpf: fix branch pruning logic
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-when the verifier detects that register contains a runtime constant
-and it's compared with another constant it will prune exploration
-of the branch that is guaranteed not to be taken at runtime.
-This is all correct, but malicious program may be constructed
-in such a way that it always has a constant comparison and
-the other branch is never taken under any conditions.
-In this case such path through the program will not be explored
-by the verifier. It won't be taken at run-time either, but since
-all instructions are JITed the malicious program may cause JITs
-to complain about using reserved fields, etc.
-To fix the issue we have to track the instructions explored by
-the verifier and sanitize instructions that are dead at run time
-with NOPs. We cannot reject such dead code, since llvm generates
-it for valid C code, since it doesn't do as much data flow
-analysis as the verifier does.
-
-Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
-Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-Acked-by: Daniel Borkmann <daniel@iogearbox.net>
-Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-(cherry picked from commit c131187db2d3fa2f8bf32fdf4e9a4ef805168467)
-CVE-2017-17862
-Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 2df70878d072d06f5bad0db3f2ee1ed47179dff8)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- include/linux/bpf_verifier.h |  2 +-
- kernel/bpf/verifier.c        | 27 +++++++++++++++++++++++++++
- 2 files changed, 28 insertions(+), 1 deletion(-)
-
-diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
-index 8e5d31f6faef..effeaa64257d 100644
---- a/include/linux/bpf_verifier.h
-+++ b/include/linux/bpf_verifier.h
-@@ -75,7 +75,7 @@ struct bpf_insn_aux_data {
-               struct bpf_map *map_ptr;        /* pointer for call insn into lookup_elem */
-       };
-       int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
--      int converted_op_size; /* the valid value width after perceived conversion */
-+      bool seen; /* this insn was processed by the verifier */
- };
- 
- #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
-diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
-index 4ecb2e10c5e0..dab5ba668b97 100644
---- a/kernel/bpf/verifier.c
-+++ b/kernel/bpf/verifier.c
-@@ -3152,6 +3152,7 @@ static int do_check(struct bpf_verifier_env *env)
-               if (err)
-                       return err;
- 
-+              env->insn_aux_data[insn_idx].seen = true;
-               if (class == BPF_ALU || class == BPF_ALU64) {
-                       err = check_alu_op(env, insn);
-                       if (err)
-@@ -3342,6 +3343,7 @@ static int do_check(struct bpf_verifier_env *env)
-                                       return err;
- 
-                               insn_idx++;
-+                              env->insn_aux_data[insn_idx].seen = true;
-                       } else {
-                               verbose("invalid BPF_LD mode\n");
-                               return -EINVAL;
-@@ -3523,6 +3525,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
-                               u32 off, u32 cnt)
- {
-       struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
-+      int i;
- 
-       if (cnt == 1)
-               return 0;
-@@ -3532,6 +3535,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
-       memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
-       memcpy(new_data + off + cnt - 1, old_data + off,
-              sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
-+      for (i = off; i < off + cnt - 1; i++)
-+              new_data[i].seen = true;
-       env->insn_aux_data = new_data;
-       vfree(old_data);
-       return 0;
-@@ -3550,6 +3555,25 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
-       return new_prog;
- }
- 
-+/* The verifier does more data flow analysis than llvm and will not explore
-+ * branches that are dead at run time. Malicious programs can have dead code
-+ * too. Therefore replace all dead at-run-time code with nops.
-+ */
-+static void sanitize_dead_code(struct bpf_verifier_env *env)
-+{
-+      struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
-+      struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
-+      struct bpf_insn *insn = env->prog->insnsi;
-+      const int insn_cnt = env->prog->len;
-+      int i;
-+
-+      for (i = 0; i < insn_cnt; i++) {
-+              if (aux_data[i].seen)
-+                      continue;
-+              memcpy(insn + i, &nop, sizeof(nop));
-+      }
-+}
-+
- /* convert load instructions that access fields of 'struct __sk_buff'
-  * into sequence of instructions that access fields of 'struct sk_buff'
-  */
-@@ -3841,6 +3865,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
-       while (pop_stack(env, NULL) >= 0);
-       free_states(env);
- 
-+      if (ret == 0)
-+              sanitize_dead_code(env);
-+
-       if (ret == 0)
-               /* program is valid, convert *(u32*)(ctx + off) accesses */
-               ret = convert_ctx_accesses(env);
--- 
-2.14.2
-
diff --git a/patches/kernel/0232-UBUNTU-SAUCE-only-attempt-to-use-PCID-in-64-bit-buil.patch b/patches/kernel/0232-UBUNTU-SAUCE-only-attempt-to-use-PCID-in-64-bit-buil.patch

deleted file mode 100644 (file)

index cbc69e6..0000000
--- a/patches/kernel/0232-UBUNTU-SAUCE-only-attempt-to-use-PCID-in-64-bit-buil.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andy Whitcroft <apw@canonical.com>
-Date: Wed, 20 Dec 2017 13:33:50 +0000
-Subject: [PATCH] UBUNTU: SAUCE: only attempt to use PCID in 64 bit builds
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-[apw@canonical.com: need to review if this is still needed with the
-latest patches.]
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit babace1d1ac19075498675cd787500cfa24d2b55)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/smpboot.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
-index 6ad8391b9866..398e8324fea4 100644
---- a/arch/x86/kernel/smpboot.c
-+++ b/arch/x86/kernel/smpboot.c
-@@ -222,7 +222,7 @@ static void notrace start_secondary(void *unused)
-        * before cpu_init(), SMP booting is too fragile that we want to
-        * limit the things done here to the most necessary things.
-        */
--      if (boot_cpu_has(X86_FEATURE_PCID))
-+      if (IS_ENABLED(CONFIG_X86_64) && boot_cpu_has(X86_FEATURE_PCID))
-               __write_cr4(__read_cr4() | X86_CR4_PCIDE);
-       cpu_init();
-       x86_cpuinit.early_percpu_clock_init();
--- 
-2.14.2
-
diff --git a/patches/kernel/0232-bpf-fix-branch-pruning-logic.patch b/patches/kernel/0232-bpf-fix-branch-pruning-logic.patch

new file mode 100644 (file)

index 0000000..977e60a
--- /dev/null
+++ b/patches/kernel/0232-bpf-fix-branch-pruning-logic.patch
@@ -0,0 +1,129 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Alexei Starovoitov <ast@fb.com>
+Date: Thu, 4 Jan 2018 08:01:24 -0600
+Subject: [PATCH] bpf: fix branch pruning logic
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+when the verifier detects that register contains a runtime constant
+and it's compared with another constant it will prune exploration
+of the branch that is guaranteed not to be taken at runtime.
+This is all correct, but malicious program may be constructed
+in such a way that it always has a constant comparison and
+the other branch is never taken under any conditions.
+In this case such path through the program will not be explored
+by the verifier. It won't be taken at run-time either, but since
+all instructions are JITed the malicious program may cause JITs
+to complain about using reserved fields, etc.
+To fix the issue we have to track the instructions explored by
+the verifier and sanitize instructions that are dead at run time
+with NOPs. We cannot reject such dead code, since llvm generates
+it for valid C code, since it doesn't do as much data flow
+analysis as the verifier does.
+
+Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+(cherry picked from commit c131187db2d3fa2f8bf32fdf4e9a4ef805168467)
+CVE-2017-17862
+Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 2df70878d072d06f5bad0db3f2ee1ed47179dff8)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ include/linux/bpf_verifier.h |  2 +-
+ kernel/bpf/verifier.c        | 27 +++++++++++++++++++++++++++
+ 2 files changed, 28 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
+index 8e5d31f6faef..effeaa64257d 100644
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -75,7 +75,7 @@ struct bpf_insn_aux_data {
+               struct bpf_map *map_ptr;        /* pointer for call insn into lookup_elem */
+       };
+       int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
+-      int converted_op_size; /* the valid value width after perceived conversion */
++      bool seen; /* this insn was processed by the verifier */
+ };
+ 
+ #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 4ecb2e10c5e0..dab5ba668b97 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -3152,6 +3152,7 @@ static int do_check(struct bpf_verifier_env *env)
+               if (err)
+                       return err;
+ 
++              env->insn_aux_data[insn_idx].seen = true;
+               if (class == BPF_ALU || class == BPF_ALU64) {
+                       err = check_alu_op(env, insn);
+                       if (err)
+@@ -3342,6 +3343,7 @@ static int do_check(struct bpf_verifier_env *env)
+                                       return err;
+ 
+                               insn_idx++;
++                              env->insn_aux_data[insn_idx].seen = true;
+                       } else {
+                               verbose("invalid BPF_LD mode\n");
+                               return -EINVAL;
+@@ -3523,6 +3525,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
+                               u32 off, u32 cnt)
+ {
+       struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
++      int i;
+ 
+       if (cnt == 1)
+               return 0;
+@@ -3532,6 +3535,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
+       memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
+       memcpy(new_data + off + cnt - 1, old_data + off,
+              sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
++      for (i = off; i < off + cnt - 1; i++)
++              new_data[i].seen = true;
+       env->insn_aux_data = new_data;
+       vfree(old_data);
+       return 0;
+@@ -3550,6 +3555,25 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
+       return new_prog;
+ }
+ 
++/* The verifier does more data flow analysis than llvm and will not explore
++ * branches that are dead at run time. Malicious programs can have dead code
++ * too. Therefore replace all dead at-run-time code with nops.
++ */
++static void sanitize_dead_code(struct bpf_verifier_env *env)
++{
++      struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
++      struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
++      struct bpf_insn *insn = env->prog->insnsi;
++      const int insn_cnt = env->prog->len;
++      int i;
++
++      for (i = 0; i < insn_cnt; i++) {
++              if (aux_data[i].seen)
++                      continue;
++              memcpy(insn + i, &nop, sizeof(nop));
++      }
++}
++
+ /* convert load instructions that access fields of 'struct __sk_buff'
+  * into sequence of instructions that access fields of 'struct sk_buff'
+  */
+@@ -3841,6 +3865,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
+       while (pop_stack(env, NULL) >= 0);
+       free_states(env);
+ 
++      if (ret == 0)
++              sanitize_dead_code(env);
++
+       if (ret == 0)
+               /* program is valid, convert *(u32*)(ctx + off) accesses */
+               ret = convert_ctx_accesses(env);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0233-UBUNTU-SAUCE-BODGE-temporarily-disable-some-kprobe-t.patch b/patches/kernel/0233-UBUNTU-SAUCE-BODGE-temporarily-disable-some-kprobe-t.patch

deleted file mode 100644 (file)

index 2467366..0000000
--- a/patches/kernel/0233-UBUNTU-SAUCE-BODGE-temporarily-disable-some-kprobe-t.patch
+++ /dev/null
@@ -1,273 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Colin Ian King <colin.king@canonical.com>
-Date: Sat, 6 Jan 2018 10:26:31 +0000
-Subject: [PATCH] UBUNTU: SAUCE: BODGE: temporarily disable some kprobe trace
- points which are cratering
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Most of the interrupt related trace points are cratering when enabled.
-Simply turn them off temporarily while we are investigating this.
-
-CVE-2017-5754
-Based on work by Colin King <colin.king@canonical.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 4ecc04d14ee2f9b46d3e252215a7622d7d47e974)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/trace/irq_vectors.h | 2 +-
- arch/x86/kernel/apic/apic.c              | 7 -------
- arch/x86/kernel/cpu/mcheck/mce_amd.c     | 3 ---
- arch/x86/kernel/cpu/mcheck/therm_throt.c | 3 ---
- arch/x86/kernel/cpu/mcheck/threshold.c   | 3 ---
- arch/x86/kernel/irq.c                    | 3 ---
- arch/x86/kernel/irq_work.c               | 3 ---
- arch/x86/kernel/smp.c                    | 7 -------
- arch/x86/mm/fault.c                      | 9 ++-------
- 9 files changed, 3 insertions(+), 37 deletions(-)
-
-diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
-index 7825b4426e7e..cf529e274a14 100644
---- a/arch/x86/include/asm/trace/irq_vectors.h
-+++ b/arch/x86/include/asm/trace/irq_vectors.h
-@@ -67,7 +67,7 @@ DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi);
-  * irq_work - called when entering/exiting a irq work interrupt
-  * vector handler
-  */
--DEFINE_IRQ_VECTOR_EVENT(irq_work);
-+// DEFINE_IRQ_VECTOR_EVENT(irq_work);
- 
- /*
-  * We must dis-allow sampling irq_work_exit() because perf event sampling
-diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
-index bb63c1350524..4a018da7eca1 100644
---- a/arch/x86/kernel/apic/apic.c
-+++ b/arch/x86/kernel/apic/apic.c
-@@ -35,7 +35,6 @@
- #include <linux/smp.h>
- #include <linux/mm.h>
- 
--#include <asm/trace/irq_vectors.h>
- #include <asm/irq_remapping.h>
- #include <asm/perf_event.h>
- #include <asm/x86_init.h>
-@@ -1074,9 +1073,7 @@ __visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
-        * interrupt lock, which is the WrongThing (tm) to do.
-        */
-       entering_ack_irq();
--      trace_local_timer_entry(LOCAL_TIMER_VECTOR);
-       local_apic_timer_interrupt();
--      trace_local_timer_exit(LOCAL_TIMER_VECTOR);
-       exiting_irq();
- 
-       set_irq_regs(old_regs);
-@@ -1967,9 +1964,7 @@ __visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
-       u8 vector = ~regs->orig_ax;
- 
-       entering_irq();
--      trace_spurious_apic_entry(vector);
-       __smp_spurious_interrupt(vector);
--      trace_spurious_apic_exit(vector);
-       exiting_irq();
- }
- 
-@@ -2023,9 +2018,7 @@ __visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
- __visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs)
- {
-       entering_irq();
--      trace_error_apic_entry(ERROR_APIC_VECTOR);
-       __smp_error_interrupt(regs);
--      trace_error_apic_exit(ERROR_APIC_VECTOR);
-       exiting_irq();
- }
- 
-diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
-index 5ce1a5689162..c983db8ccdb8 100644
---- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
-+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
-@@ -26,7 +26,6 @@
- #include <asm/apic.h>
- #include <asm/mce.h>
- #include <asm/msr.h>
--#include <asm/trace/irq_vectors.h>
- 
- #define NR_BLOCKS         5
- #define THRESHOLD_MAX     0xFFF
-@@ -787,9 +786,7 @@ asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
- asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
- {
-       entering_irq();
--      trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
-       __smp_deferred_error_interrupt();
--      trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
-       exiting_ack_irq();
- }
- 
-diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
-index f7370abd33c6..f366a622e186 100644
---- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
-+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
-@@ -28,7 +28,6 @@
- #include <asm/apic.h>
- #include <asm/mce.h>
- #include <asm/msr.h>
--#include <asm/trace/irq_vectors.h>
- 
- /* How long to wait between reporting thermal events */
- #define CHECK_INTERVAL                (300 * HZ)
-@@ -408,9 +407,7 @@ asmlinkage __visible void __irq_entry
- smp_trace_thermal_interrupt(struct pt_regs *regs)
- {
-       entering_irq();
--      trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
-       __smp_thermal_interrupt();
--      trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
-       exiting_ack_irq();
- }
- 
-diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
-index bb0e75eed10a..623f3e3515e0 100644
---- a/arch/x86/kernel/cpu/mcheck/threshold.c
-+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
-@@ -7,7 +7,6 @@
- #include <asm/irq_vectors.h>
- #include <asm/apic.h>
- #include <asm/mce.h>
--#include <asm/trace/irq_vectors.h>
- 
- static void default_threshold_interrupt(void)
- {
-@@ -33,8 +32,6 @@ asmlinkage __visible void __irq_entry smp_threshold_interrupt(void)
- asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void)
- {
-       entering_irq();
--      trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
-       __smp_threshold_interrupt();
--      trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
-       exiting_ack_irq();
- }
-diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
-index a84142a910f3..792a49c3c6d9 100644
---- a/arch/x86/kernel/irq.c
-+++ b/arch/x86/kernel/irq.c
-@@ -19,7 +19,6 @@
- #include <asm/desc.h>
- 
- #define CREATE_TRACE_POINTS
--#include <asm/trace/irq_vectors.h>
- 
- DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
- EXPORT_PER_CPU_SYMBOL(irq_stat);
-@@ -327,9 +326,7 @@ __visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs)
-       struct pt_regs *old_regs = set_irq_regs(regs);
- 
-       entering_ack_irq();
--      trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
-       __smp_x86_platform_ipi();
--      trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
-       exiting_irq();
-       set_irq_regs(old_regs);
- }
-diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
-index 275487872be2..06f12444c1b4 100644
---- a/arch/x86/kernel/irq_work.c
-+++ b/arch/x86/kernel/irq_work.c
-@@ -8,7 +8,6 @@
- #include <linux/irq_work.h>
- #include <linux/hardirq.h>
- #include <asm/apic.h>
--#include <asm/trace/irq_vectors.h>
- #include <linux/interrupt.h>
- 
- static inline void __smp_irq_work_interrupt(void)
-@@ -27,9 +26,7 @@ __visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
- __visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs)
- {
-       ipi_entering_ack_irq();
--      trace_irq_work_entry(IRQ_WORK_VECTOR);
-       __smp_irq_work_interrupt();
--      trace_irq_work_exit(IRQ_WORK_VECTOR);
-       exiting_irq();
- }
- 
-diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
-index d798c0da451c..fbf36f1731ab 100644
---- a/arch/x86/kernel/smp.c
-+++ b/arch/x86/kernel/smp.c
-@@ -31,7 +31,6 @@
- #include <asm/apic.h>
- #include <asm/nmi.h>
- #include <asm/mce.h>
--#include <asm/trace/irq_vectors.h>
- #include <asm/kexec.h>
- #include <asm/virtext.h>
- 
-@@ -280,9 +279,7 @@ __visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs)
-        * to nest.
-        */
-       ipi_entering_ack_irq();
--      trace_reschedule_entry(RESCHEDULE_VECTOR);
-       __smp_reschedule_interrupt();
--      trace_reschedule_exit(RESCHEDULE_VECTOR);
-       exiting_irq();
-       /*
-        * KVM uses this interrupt to force a cpu out of guest mode
-@@ -306,9 +303,7 @@ __visible void __irq_entry
- smp_trace_call_function_interrupt(struct pt_regs *regs)
- {
-       ipi_entering_ack_irq();
--      trace_call_function_entry(CALL_FUNCTION_VECTOR);
-       __smp_call_function_interrupt();
--      trace_call_function_exit(CALL_FUNCTION_VECTOR);
-       exiting_irq();
- }
- 
-@@ -330,9 +325,7 @@ __visible void __irq_entry
- smp_trace_call_function_single_interrupt(struct pt_regs *regs)
- {
-       ipi_entering_ack_irq();
--      trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
-       __smp_call_function_single_interrupt();
--      trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
-       exiting_irq();
- }
- 
-diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
-index d3a57e7ad311..4f6478d14d1f 100644
---- a/arch/x86/mm/fault.c
-+++ b/arch/x86/mm/fault.c
-@@ -26,7 +26,6 @@
- #include <asm/mmu_context.h>          /* vma_pkey()                   */
- 
- #define CREATE_TRACE_POINTS
--#include <asm/trace/exceptions.h>
- 
- /*
-  * Returns 0 if mmiotrace is disabled, or if the fault is not
-@@ -1471,10 +1470,6 @@ static nokprobe_inline void
- trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
-                        unsigned long error_code)
- {
--      if (user_mode(regs))
--              trace_page_fault_user(address, regs, error_code);
--      else
--              trace_page_fault_kernel(address, regs, error_code);
- }
- 
- /*
-@@ -1491,8 +1486,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
-       enum ctx_state prev_state;
- 
-       prev_state = exception_enter();
--      if (trace_irqvectors_enabled())
--              trace_page_fault_entries(address, regs, error_code);
-+//    if (trace_irqvectors_enabled())
-+//            trace_page_fault_entries(address, regs, error_code);
- 
-       __do_page_fault(regs, error_code, address);
-       exception_exit(prev_state);
--- 
-2.14.2
-
diff --git a/patches/kernel/0233-UBUNTU-SAUCE-only-attempt-to-use-PCID-in-64-bit-buil.patch b/patches/kernel/0233-UBUNTU-SAUCE-only-attempt-to-use-PCID-in-64-bit-buil.patch

new file mode 100644 (file)

index 0000000..cbc69e6
--- /dev/null
+++ b/patches/kernel/0233-UBUNTU-SAUCE-only-attempt-to-use-PCID-in-64-bit-buil.patch
@@ -0,0 +1,36 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andy Whitcroft <apw@canonical.com>
+Date: Wed, 20 Dec 2017 13:33:50 +0000
+Subject: [PATCH] UBUNTU: SAUCE: only attempt to use PCID in 64 bit builds
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+[apw@canonical.com: need to review if this is still needed with the
+latest patches.]
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit babace1d1ac19075498675cd787500cfa24d2b55)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/smpboot.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index 6ad8391b9866..398e8324fea4 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -222,7 +222,7 @@ static void notrace start_secondary(void *unused)
+        * before cpu_init(), SMP booting is too fragile that we want to
+        * limit the things done here to the most necessary things.
+        */
+-      if (boot_cpu_has(X86_FEATURE_PCID))
++      if (IS_ENABLED(CONFIG_X86_64) && boot_cpu_has(X86_FEATURE_PCID))
+               __write_cr4(__read_cr4() | X86_CR4_PCIDE);
+       cpu_init();
+       x86_cpuinit.early_percpu_clock_init();
+-- 
+2.14.2
+
diff --git a/patches/kernel/0234-UBUNTU-SAUCE-BODGE-temporarily-disable-some-kprobe-t.patch b/patches/kernel/0234-UBUNTU-SAUCE-BODGE-temporarily-disable-some-kprobe-t.patch

new file mode 100644 (file)

index 0000000..2467366
--- /dev/null
+++ b/patches/kernel/0234-UBUNTU-SAUCE-BODGE-temporarily-disable-some-kprobe-t.patch
@@ -0,0 +1,273 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Colin Ian King <colin.king@canonical.com>
+Date: Sat, 6 Jan 2018 10:26:31 +0000
+Subject: [PATCH] UBUNTU: SAUCE: BODGE: temporarily disable some kprobe trace
+ points which are cratering
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Most of the interrupt related trace points are cratering when enabled.
+Simply turn them off temporarily while we are investigating this.
+
+CVE-2017-5754
+Based on work by Colin King <colin.king@canonical.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 4ecc04d14ee2f9b46d3e252215a7622d7d47e974)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/trace/irq_vectors.h | 2 +-
+ arch/x86/kernel/apic/apic.c              | 7 -------
+ arch/x86/kernel/cpu/mcheck/mce_amd.c     | 3 ---
+ arch/x86/kernel/cpu/mcheck/therm_throt.c | 3 ---
+ arch/x86/kernel/cpu/mcheck/threshold.c   | 3 ---
+ arch/x86/kernel/irq.c                    | 3 ---
+ arch/x86/kernel/irq_work.c               | 3 ---
+ arch/x86/kernel/smp.c                    | 7 -------
+ arch/x86/mm/fault.c                      | 9 ++-------
+ 9 files changed, 3 insertions(+), 37 deletions(-)
+
+diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
+index 7825b4426e7e..cf529e274a14 100644
+--- a/arch/x86/include/asm/trace/irq_vectors.h
++++ b/arch/x86/include/asm/trace/irq_vectors.h
+@@ -67,7 +67,7 @@ DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi);
+  * irq_work - called when entering/exiting a irq work interrupt
+  * vector handler
+  */
+-DEFINE_IRQ_VECTOR_EVENT(irq_work);
++// DEFINE_IRQ_VECTOR_EVENT(irq_work);
+ 
+ /*
+  * We must dis-allow sampling irq_work_exit() because perf event sampling
+diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
+index bb63c1350524..4a018da7eca1 100644
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -35,7 +35,6 @@
+ #include <linux/smp.h>
+ #include <linux/mm.h>
+ 
+-#include <asm/trace/irq_vectors.h>
+ #include <asm/irq_remapping.h>
+ #include <asm/perf_event.h>
+ #include <asm/x86_init.h>
+@@ -1074,9 +1073,7 @@ __visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
+        * interrupt lock, which is the WrongThing (tm) to do.
+        */
+       entering_ack_irq();
+-      trace_local_timer_entry(LOCAL_TIMER_VECTOR);
+       local_apic_timer_interrupt();
+-      trace_local_timer_exit(LOCAL_TIMER_VECTOR);
+       exiting_irq();
+ 
+       set_irq_regs(old_regs);
+@@ -1967,9 +1964,7 @@ __visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
+       u8 vector = ~regs->orig_ax;
+ 
+       entering_irq();
+-      trace_spurious_apic_entry(vector);
+       __smp_spurious_interrupt(vector);
+-      trace_spurious_apic_exit(vector);
+       exiting_irq();
+ }
+ 
+@@ -2023,9 +2018,7 @@ __visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
+ __visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs)
+ {
+       entering_irq();
+-      trace_error_apic_entry(ERROR_APIC_VECTOR);
+       __smp_error_interrupt(regs);
+-      trace_error_apic_exit(ERROR_APIC_VECTOR);
+       exiting_irq();
+ }
+ 
+diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+index 5ce1a5689162..c983db8ccdb8 100644
+--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+@@ -26,7 +26,6 @@
+ #include <asm/apic.h>
+ #include <asm/mce.h>
+ #include <asm/msr.h>
+-#include <asm/trace/irq_vectors.h>
+ 
+ #define NR_BLOCKS         5
+ #define THRESHOLD_MAX     0xFFF
+@@ -787,9 +786,7 @@ asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
+ asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
+ {
+       entering_irq();
+-      trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
+       __smp_deferred_error_interrupt();
+-      trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
+       exiting_ack_irq();
+ }
+ 
+diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
+index f7370abd33c6..f366a622e186 100644
+--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
++++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
+@@ -28,7 +28,6 @@
+ #include <asm/apic.h>
+ #include <asm/mce.h>
+ #include <asm/msr.h>
+-#include <asm/trace/irq_vectors.h>
+ 
+ /* How long to wait between reporting thermal events */
+ #define CHECK_INTERVAL                (300 * HZ)
+@@ -408,9 +407,7 @@ asmlinkage __visible void __irq_entry
+ smp_trace_thermal_interrupt(struct pt_regs *regs)
+ {
+       entering_irq();
+-      trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
+       __smp_thermal_interrupt();
+-      trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
+       exiting_ack_irq();
+ }
+ 
+diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
+index bb0e75eed10a..623f3e3515e0 100644
+--- a/arch/x86/kernel/cpu/mcheck/threshold.c
++++ b/arch/x86/kernel/cpu/mcheck/threshold.c
+@@ -7,7 +7,6 @@
+ #include <asm/irq_vectors.h>
+ #include <asm/apic.h>
+ #include <asm/mce.h>
+-#include <asm/trace/irq_vectors.h>
+ 
+ static void default_threshold_interrupt(void)
+ {
+@@ -33,8 +32,6 @@ asmlinkage __visible void __irq_entry smp_threshold_interrupt(void)
+ asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void)
+ {
+       entering_irq();
+-      trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
+       __smp_threshold_interrupt();
+-      trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
+       exiting_ack_irq();
+ }
+diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
+index a84142a910f3..792a49c3c6d9 100644
+--- a/arch/x86/kernel/irq.c
++++ b/arch/x86/kernel/irq.c
+@@ -19,7 +19,6 @@
+ #include <asm/desc.h>
+ 
+ #define CREATE_TRACE_POINTS
+-#include <asm/trace/irq_vectors.h>
+ 
+ DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+ EXPORT_PER_CPU_SYMBOL(irq_stat);
+@@ -327,9 +326,7 @@ __visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs)
+       struct pt_regs *old_regs = set_irq_regs(regs);
+ 
+       entering_ack_irq();
+-      trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
+       __smp_x86_platform_ipi();
+-      trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
+       exiting_irq();
+       set_irq_regs(old_regs);
+ }
+diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
+index 275487872be2..06f12444c1b4 100644
+--- a/arch/x86/kernel/irq_work.c
++++ b/arch/x86/kernel/irq_work.c
+@@ -8,7 +8,6 @@
+ #include <linux/irq_work.h>
+ #include <linux/hardirq.h>
+ #include <asm/apic.h>
+-#include <asm/trace/irq_vectors.h>
+ #include <linux/interrupt.h>
+ 
+ static inline void __smp_irq_work_interrupt(void)
+@@ -27,9 +26,7 @@ __visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
+ __visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs)
+ {
+       ipi_entering_ack_irq();
+-      trace_irq_work_entry(IRQ_WORK_VECTOR);
+       __smp_irq_work_interrupt();
+-      trace_irq_work_exit(IRQ_WORK_VECTOR);
+       exiting_irq();
+ }
+ 
+diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
+index d798c0da451c..fbf36f1731ab 100644
+--- a/arch/x86/kernel/smp.c
++++ b/arch/x86/kernel/smp.c
+@@ -31,7 +31,6 @@
+ #include <asm/apic.h>
+ #include <asm/nmi.h>
+ #include <asm/mce.h>
+-#include <asm/trace/irq_vectors.h>
+ #include <asm/kexec.h>
+ #include <asm/virtext.h>
+ 
+@@ -280,9 +279,7 @@ __visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs)
+        * to nest.
+        */
+       ipi_entering_ack_irq();
+-      trace_reschedule_entry(RESCHEDULE_VECTOR);
+       __smp_reschedule_interrupt();
+-      trace_reschedule_exit(RESCHEDULE_VECTOR);
+       exiting_irq();
+       /*
+        * KVM uses this interrupt to force a cpu out of guest mode
+@@ -306,9 +303,7 @@ __visible void __irq_entry
+ smp_trace_call_function_interrupt(struct pt_regs *regs)
+ {
+       ipi_entering_ack_irq();
+-      trace_call_function_entry(CALL_FUNCTION_VECTOR);
+       __smp_call_function_interrupt();
+-      trace_call_function_exit(CALL_FUNCTION_VECTOR);
+       exiting_irq();
+ }
+ 
+@@ -330,9 +325,7 @@ __visible void __irq_entry
+ smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+ {
+       ipi_entering_ack_irq();
+-      trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
+       __smp_call_function_single_interrupt();
+-      trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
+       exiting_irq();
+ }
+ 
+diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
+index d3a57e7ad311..4f6478d14d1f 100644
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -26,7 +26,6 @@
+ #include <asm/mmu_context.h>          /* vma_pkey()                   */
+ 
+ #define CREATE_TRACE_POINTS
+-#include <asm/trace/exceptions.h>
+ 
+ /*
+  * Returns 0 if mmiotrace is disabled, or if the fault is not
+@@ -1471,10 +1470,6 @@ static nokprobe_inline void
+ trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
+                        unsigned long error_code)
+ {
+-      if (user_mode(regs))
+-              trace_page_fault_user(address, regs, error_code);
+-      else
+-              trace_page_fault_kernel(address, regs, error_code);
+ }
+ 
+ /*
+@@ -1491,8 +1486,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
+       enum ctx_state prev_state;
+ 
+       prev_state = exception_enter();
+-      if (trace_irqvectors_enabled())
+-              trace_page_fault_entries(address, regs, error_code);
++//    if (trace_irqvectors_enabled())
++//            trace_page_fault_entries(address, regs, error_code);
+ 
+       __do_page_fault(regs, error_code, address);
+       exception_exit(prev_state);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0234-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch b/patches/kernel/0234-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch

deleted file mode 100644 (file)

index 4e1f81a..0000000
--- a/patches/kernel/0234-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch
+++ /dev/null
@@ -1,98 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jim Mattson <jmattson@google.com>
-Date: Wed, 3 Jan 2018 14:31:38 -0800
-Subject: [PATCH] kvm: vmx: Scrub hardware GPRs at VM-exit
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Guest GPR values are live in the hardware GPRs at VM-exit.  Do not
-leave any guest values in hardware GPRs after the guest GPR values are
-saved to the vcpu_vmx structure.
-
-This is a partial mitigation for CVE 2017-5715 and CVE 2017-5753.
-Specifically, it defeats the Project Zero PoC for CVE 2017-5715.
-
-Suggested-by: Eric Northup <digitaleric@google.com>
-Signed-off-by: Jim Mattson <jmattson@google.com>
-Reviewed-by: Eric Northup <digitaleric@google.com>
-Reviewed-by: Benjamin Serebrin <serebrin@google.com>
-Reviewed-by: Andrew Honig <ahonig@google.com>
-[Paolo: Add AMD bits, Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>]
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kvm/svm.c | 19 +++++++++++++++++++
- arch/x86/kvm/vmx.c | 14 +++++++++++++-
- 2 files changed, 32 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
-index af09baa3d736..92cd94d51e1f 100644
---- a/arch/x86/kvm/svm.c
-+++ b/arch/x86/kvm/svm.c
-@@ -4924,6 +4924,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
-               "mov %%r13, %c[r13](%[svm]) \n\t"
-               "mov %%r14, %c[r14](%[svm]) \n\t"
-               "mov %%r15, %c[r15](%[svm]) \n\t"
-+#endif
-+              /*
-+              * Clear host registers marked as clobbered to prevent
-+              * speculative use.
-+              */
-+              "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
-+              "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
-+              "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
-+              "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
-+              "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
-+#ifdef CONFIG_X86_64
-+              "xor %%r8, %%r8 \n\t"
-+              "xor %%r9, %%r9 \n\t"
-+              "xor %%r10, %%r10 \n\t"
-+              "xor %%r11, %%r11 \n\t"
-+              "xor %%r12, %%r12 \n\t"
-+              "xor %%r13, %%r13 \n\t"
-+              "xor %%r14, %%r14 \n\t"
-+              "xor %%r15, %%r15 \n\t"
- #endif
-               "pop %%" _ASM_BP
-               :
-diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
-index d61986a36575..9b4256fd589a 100644
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -9140,6 +9140,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
-               /* Save guest registers, load host registers, keep flags */
-               "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
-               "pop %0 \n\t"
-+              "setbe %c[fail](%0)\n\t"
-               "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
-               "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
-               __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
-@@ -9156,12 +9157,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
-               "mov %%r13, %c[r13](%0) \n\t"
-               "mov %%r14, %c[r14](%0) \n\t"
-               "mov %%r15, %c[r15](%0) \n\t"
-+              "xor %%r8d,  %%r8d \n\t"
-+              "xor %%r9d,  %%r9d \n\t"
-+              "xor %%r10d, %%r10d \n\t"
-+              "xor %%r11d, %%r11d \n\t"
-+              "xor %%r12d, %%r12d \n\t"
-+              "xor %%r13d, %%r13d \n\t"
-+              "xor %%r14d, %%r14d \n\t"
-+              "xor %%r15d, %%r15d \n\t"
- #endif
-               "mov %%cr2, %%" _ASM_AX "   \n\t"
-               "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
- 
-+              "xor %%eax, %%eax \n\t"
-+              "xor %%ebx, %%ebx \n\t"
-+              "xor %%esi, %%esi \n\t"
-+              "xor %%edi, %%edi \n\t"
-               "pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
--              "setbe %c[fail](%0) \n\t"
-               ".pushsection .rodata \n\t"
-               ".global vmx_return \n\t"
-               "vmx_return: " _ASM_PTR " 2b \n\t"
--- 
-2.14.2
-
diff --git a/patches/kernel/0235-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch b/patches/kernel/0235-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch

new file mode 100644 (file)

index 0000000..4e1f81a
--- /dev/null
+++ b/patches/kernel/0235-kvm-vmx-Scrub-hardware-GPRs-at-VM-exit.patch
@@ -0,0 +1,98 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Jim Mattson <jmattson@google.com>
+Date: Wed, 3 Jan 2018 14:31:38 -0800
+Subject: [PATCH] kvm: vmx: Scrub hardware GPRs at VM-exit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Guest GPR values are live in the hardware GPRs at VM-exit.  Do not
+leave any guest values in hardware GPRs after the guest GPR values are
+saved to the vcpu_vmx structure.
+
+This is a partial mitigation for CVE 2017-5715 and CVE 2017-5753.
+Specifically, it defeats the Project Zero PoC for CVE 2017-5715.
+
+Suggested-by: Eric Northup <digitaleric@google.com>
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Reviewed-by: Eric Northup <digitaleric@google.com>
+Reviewed-by: Benjamin Serebrin <serebrin@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+[Paolo: Add AMD bits, Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/svm.c | 19 +++++++++++++++++++
+ arch/x86/kvm/vmx.c | 14 +++++++++++++-
+ 2 files changed, 32 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index af09baa3d736..92cd94d51e1f 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -4924,6 +4924,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+               "mov %%r13, %c[r13](%[svm]) \n\t"
+               "mov %%r14, %c[r14](%[svm]) \n\t"
+               "mov %%r15, %c[r15](%[svm]) \n\t"
++#endif
++              /*
++              * Clear host registers marked as clobbered to prevent
++              * speculative use.
++              */
++              "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
++              "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
++              "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
++              "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
++              "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
++#ifdef CONFIG_X86_64
++              "xor %%r8, %%r8 \n\t"
++              "xor %%r9, %%r9 \n\t"
++              "xor %%r10, %%r10 \n\t"
++              "xor %%r11, %%r11 \n\t"
++              "xor %%r12, %%r12 \n\t"
++              "xor %%r13, %%r13 \n\t"
++              "xor %%r14, %%r14 \n\t"
++              "xor %%r15, %%r15 \n\t"
+ #endif
+               "pop %%" _ASM_BP
+               :
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index d61986a36575..9b4256fd589a 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -9140,6 +9140,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+               /* Save guest registers, load host registers, keep flags */
+               "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
+               "pop %0 \n\t"
++              "setbe %c[fail](%0)\n\t"
+               "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
+               "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
+               __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
+@@ -9156,12 +9157,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+               "mov %%r13, %c[r13](%0) \n\t"
+               "mov %%r14, %c[r14](%0) \n\t"
+               "mov %%r15, %c[r15](%0) \n\t"
++              "xor %%r8d,  %%r8d \n\t"
++              "xor %%r9d,  %%r9d \n\t"
++              "xor %%r10d, %%r10d \n\t"
++              "xor %%r11d, %%r11d \n\t"
++              "xor %%r12d, %%r12d \n\t"
++              "xor %%r13d, %%r13d \n\t"
++              "xor %%r14d, %%r14d \n\t"
++              "xor %%r15d, %%r15d \n\t"
+ #endif
+               "mov %%cr2, %%" _ASM_AX "   \n\t"
+               "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
+ 
++              "xor %%eax, %%eax \n\t"
++              "xor %%ebx, %%ebx \n\t"
++              "xor %%esi, %%esi \n\t"
++              "xor %%edi, %%edi \n\t"
+               "pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
+-              "setbe %c[fail](%0) \n\t"
+               ".pushsection .rodata \n\t"
+               ".global vmx_return \n\t"
+               "vmx_return: " _ASM_PTR " 2b \n\t"
+-- 
+2.14.2
+
diff --git a/patches/kernel/0235-objtool-use-sh-to-invoke-sync-check.sh-in-the-Makefi.patch b/patches/kernel/0235-objtool-use-sh-to-invoke-sync-check.sh-in-the-Makefi.patch

deleted file mode 100644 (file)

index f343025..0000000
--- a/patches/kernel/0235-objtool-use-sh-to-invoke-sync-check.sh-in-the-Makefi.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
-Date: Sat, 6 Jan 2018 17:50:34 -0200
-Subject: [PATCH] objtool: use sh to invoke sync-check.sh in the Makefile
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This avoids build failures when building debian packages.
-
-Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
-(cherry picked from commit 6abf30ed000f0da24985295c206cc6f08a311301)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- tools/objtool/Makefile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
-index 5c71bae01064..fe022f68638d 100644
---- a/tools/objtool/Makefile
-+++ b/tools/objtool/Makefile
-@@ -44,7 +44,7 @@ $(OBJTOOL_IN): fixdep FORCE
-       @$(MAKE) $(build)=objtool
- 
- $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
--      @./sync-check.sh
-+      @sh ./sync-check.sh
-       $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
- 
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0236-objtool-use-sh-to-invoke-sync-check.sh-in-the-Makefi.patch b/patches/kernel/0236-objtool-use-sh-to-invoke-sync-check.sh-in-the-Makefi.patch

new file mode 100644 (file)

index 0000000..f343025
--- /dev/null
+++ b/patches/kernel/0236-objtool-use-sh-to-invoke-sync-check.sh-in-the-Makefi.patch
@@ -0,0 +1,35 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
+Date: Sat, 6 Jan 2018 17:50:34 -0200
+Subject: [PATCH] objtool: use sh to invoke sync-check.sh in the Makefile
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This avoids build failures when building debian packages.
+
+Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
+(cherry picked from commit 6abf30ed000f0da24985295c206cc6f08a311301)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ tools/objtool/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
+index 5c71bae01064..fe022f68638d 100644
+--- a/tools/objtool/Makefile
++++ b/tools/objtool/Makefile
+@@ -44,7 +44,7 @@ $(OBJTOOL_IN): fixdep FORCE
+       @$(MAKE) $(build)=objtool
+ 
+ $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
+-      @./sync-check.sh
++      @sh ./sync-check.sh
+       $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
+ 
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0236-x86-tlb-Drop-the-_GPL-from-the-cpu_tlbstate-export.patch b/patches/kernel/0236-x86-tlb-Drop-the-_GPL-from-the-cpu_tlbstate-export.patch

deleted file mode 100644 (file)

index d72b125..0000000
--- a/patches/kernel/0236-x86-tlb-Drop-the-_GPL-from-the-cpu_tlbstate-export.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Thu, 4 Jan 2018 22:19:04 +0100
-Subject: [PATCH] x86/tlb: Drop the _GPL from the cpu_tlbstate export
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-commit 1e5476815fd7f98b888e01a0f9522b63085f96c9 upstream.
-
-The recent changes for PTI touch cpu_tlbstate from various tlb_flush
-inlines. cpu_tlbstate is exported as GPL symbol, so this causes a
-regression when building out of tree drivers for certain graphics cards.
-
-Aside of that the export was wrong since it was introduced as it should
-have been EXPORT_PER_CPU_SYMBOL_GPL().
-
-Use the correct PER_CPU export and drop the _GPL to restore the previous
-state which allows users to utilize the cards they payed for.
-
-As always I'm really thrilled to make this kind of change to support the
-#friends (or however the hot hashtag of today is spelled) from that closet
-sauce graphics corp.
-
-Fixes: 1e02ce4cccdc ("x86: Store a per-cpu shadow copy of CR4")
-Fixes: 6fd166aae78c ("x86/mm: Use/Fix PCID to optimize user/kernel switches")
-Reported-by: Kees Cook <keescook@google.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/init.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
-index 80259ad8c386..6b462a472a7b 100644
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -870,7 +870,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
-       .next_asid = 1,
-       .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
- };
--EXPORT_SYMBOL_GPL(cpu_tlbstate);
-+EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
- 
- void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
- {
--- 
-2.14.2
-
diff --git a/patches/kernel/0237-x86-events-intel-ds-Use-the-proper-cache-flush-metho.patch b/patches/kernel/0237-x86-events-intel-ds-Use-the-proper-cache-flush-metho.patch

deleted file mode 100644 (file)

index b3276d5..0000000
--- a/patches/kernel/0237-x86-events-intel-ds-Use-the-proper-cache-flush-metho.patch
+++ /dev/null
@@ -1,105 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Thu, 4 Jan 2018 18:07:12 +0100
-Subject: [PATCH] x86/events/intel/ds: Use the proper cache flush method for
- mapping ds buffers
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-commit 42f3bdc5dd962a5958bc024c1e1444248a6b8b4a upstream.
-
-Thomas reported the following warning:
-
- BUG: using smp_processor_id() in preemptible [00000000] code: ovsdb-server/4498
- caller is native_flush_tlb_single+0x57/0xc0
- native_flush_tlb_single+0x57/0xc0
- __set_pte_vaddr+0x2d/0x40
- set_pte_vaddr+0x2f/0x40
- cea_set_pte+0x30/0x40
- ds_update_cea.constprop.4+0x4d/0x70
- reserve_ds_buffers+0x159/0x410
- x86_reserve_hardware+0x150/0x160
- x86_pmu_event_init+0x3e/0x1f0
- perf_try_init_event+0x69/0x80
- perf_event_alloc+0x652/0x740
- SyS_perf_event_open+0x3f6/0xd60
- do_syscall_64+0x5c/0x190
-
-set_pte_vaddr is used to map the ds buffers into the cpu entry area, but
-there are two problems with that:
-
- 1) The resulting flush is not supposed to be called in preemptible context
-
- 2) The cpu entry area is supposed to be per CPU, but the debug store
-    buffers are mapped for all CPUs so these mappings need to be flushed
-    globally.
-
-Add the necessary preemption protection across the mapping code and flush
-TLBs globally.
-
-Fixes: c1961a4631da ("x86/events/intel/ds: Map debug buffers in cpu_entry_area")
-Reported-by: Thomas Zeitlhofer <thomas.zeitlhofer+lkml@ze-it.at>
-Signed-off-by: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Tested-by: Thomas Zeitlhofer <thomas.zeitlhofer+lkml@ze-it.at>
-Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Cc: Hugh Dickins <hughd@google.com>
-Link: https://lkml.kernel.org/r/20180104170712.GB3040@hirez.programming.kicks-ass.net
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/events/intel/ds.c | 16 ++++++++++++++++
- 1 file changed, 16 insertions(+)
-
-diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
-index 85df1f12c49e..1d236666ee0e 100644
---- a/arch/x86/events/intel/ds.c
-+++ b/arch/x86/events/intel/ds.c
-@@ -4,6 +4,7 @@
- 
- #include <asm/cpu_entry_area.h>
- #include <asm/perf_event.h>
-+#include <asm/tlbflush.h>
- #include <asm/insn.h>
- 
- #include "../perf_event.h"
-@@ -282,20 +283,35 @@ static DEFINE_PER_CPU(void *, insn_buffer);
- 
- static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
- {
-+      unsigned long start = (unsigned long)cea;
-       phys_addr_t pa;
-       size_t msz = 0;
- 
-       pa = virt_to_phys(addr);
-+
-+      preempt_disable();
-       for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
-               cea_set_pte(cea, pa, prot);
-+
-+      /*
-+       * This is a cross-CPU update of the cpu_entry_area, we must shoot down
-+       * all TLB entries for it.
-+       */
-+      flush_tlb_kernel_range(start, start + size);
-+      preempt_enable();
- }
- 
- static void ds_clear_cea(void *cea, size_t size)
- {
-+      unsigned long start = (unsigned long)cea;
-       size_t msz = 0;
- 
-+      preempt_disable();
-       for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
-               cea_set_pte(cea, 0, PAGE_NONE);
-+
-+      flush_tlb_kernel_range(start, start + size);
-+      preempt_enable();
- }
- 
- static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
--- 
-2.14.2
-
diff --git a/patches/kernel/0237-x86-tlb-Drop-the-_GPL-from-the-cpu_tlbstate-export.patch b/patches/kernel/0237-x86-tlb-Drop-the-_GPL-from-the-cpu_tlbstate-export.patch

new file mode 100644 (file)

index 0000000..d72b125
--- /dev/null
+++ b/patches/kernel/0237-x86-tlb-Drop-the-_GPL-from-the-cpu_tlbstate-export.patch
@@ -0,0 +1,53 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 4 Jan 2018 22:19:04 +0100
+Subject: [PATCH] x86/tlb: Drop the _GPL from the cpu_tlbstate export
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 1e5476815fd7f98b888e01a0f9522b63085f96c9 upstream.
+
+The recent changes for PTI touch cpu_tlbstate from various tlb_flush
+inlines. cpu_tlbstate is exported as GPL symbol, so this causes a
+regression when building out of tree drivers for certain graphics cards.
+
+Aside of that the export was wrong since it was introduced as it should
+have been EXPORT_PER_CPU_SYMBOL_GPL().
+
+Use the correct PER_CPU export and drop the _GPL to restore the previous
+state which allows users to utilize the cards they payed for.
+
+As always I'm really thrilled to make this kind of change to support the
+#friends (or however the hot hashtag of today is spelled) from that closet
+sauce graphics corp.
+
+Fixes: 1e02ce4cccdc ("x86: Store a per-cpu shadow copy of CR4")
+Fixes: 6fd166aae78c ("x86/mm: Use/Fix PCID to optimize user/kernel switches")
+Reported-by: Kees Cook <keescook@google.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/init.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index 80259ad8c386..6b462a472a7b 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -870,7 +870,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+       .next_asid = 1,
+       .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
+ };
+-EXPORT_SYMBOL_GPL(cpu_tlbstate);
++EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
+ 
+ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
+ {
+-- 
+2.14.2
+
diff --git a/patches/kernel/0238-x86-events-intel-ds-Use-the-proper-cache-flush-metho.patch b/patches/kernel/0238-x86-events-intel-ds-Use-the-proper-cache-flush-metho.patch

new file mode 100644 (file)

index 0000000..b3276d5
--- /dev/null
+++ b/patches/kernel/0238-x86-events-intel-ds-Use-the-proper-cache-flush-metho.patch
@@ -0,0 +1,105 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 4 Jan 2018 18:07:12 +0100
+Subject: [PATCH] x86/events/intel/ds: Use the proper cache flush method for
+ mapping ds buffers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 42f3bdc5dd962a5958bc024c1e1444248a6b8b4a upstream.
+
+Thomas reported the following warning:
+
+ BUG: using smp_processor_id() in preemptible [00000000] code: ovsdb-server/4498
+ caller is native_flush_tlb_single+0x57/0xc0
+ native_flush_tlb_single+0x57/0xc0
+ __set_pte_vaddr+0x2d/0x40
+ set_pte_vaddr+0x2f/0x40
+ cea_set_pte+0x30/0x40
+ ds_update_cea.constprop.4+0x4d/0x70
+ reserve_ds_buffers+0x159/0x410
+ x86_reserve_hardware+0x150/0x160
+ x86_pmu_event_init+0x3e/0x1f0
+ perf_try_init_event+0x69/0x80
+ perf_event_alloc+0x652/0x740
+ SyS_perf_event_open+0x3f6/0xd60
+ do_syscall_64+0x5c/0x190
+
+set_pte_vaddr is used to map the ds buffers into the cpu entry area, but
+there are two problems with that:
+
+ 1) The resulting flush is not supposed to be called in preemptible context
+
+ 2) The cpu entry area is supposed to be per CPU, but the debug store
+    buffers are mapped for all CPUs so these mappings need to be flushed
+    globally.
+
+Add the necessary preemption protection across the mapping code and flush
+TLBs globally.
+
+Fixes: c1961a4631da ("x86/events/intel/ds: Map debug buffers in cpu_entry_area")
+Reported-by: Thomas Zeitlhofer <thomas.zeitlhofer+lkml@ze-it.at>
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Thomas Zeitlhofer <thomas.zeitlhofer+lkml@ze-it.at>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Hugh Dickins <hughd@google.com>
+Link: https://lkml.kernel.org/r/20180104170712.GB3040@hirez.programming.kicks-ass.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/events/intel/ds.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
+index 85df1f12c49e..1d236666ee0e 100644
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -4,6 +4,7 @@
+ 
+ #include <asm/cpu_entry_area.h>
+ #include <asm/perf_event.h>
++#include <asm/tlbflush.h>
+ #include <asm/insn.h>
+ 
+ #include "../perf_event.h"
+@@ -282,20 +283,35 @@ static DEFINE_PER_CPU(void *, insn_buffer);
+ 
+ static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
+ {
++      unsigned long start = (unsigned long)cea;
+       phys_addr_t pa;
+       size_t msz = 0;
+ 
+       pa = virt_to_phys(addr);
++
++      preempt_disable();
+       for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
+               cea_set_pte(cea, pa, prot);
++
++      /*
++       * This is a cross-CPU update of the cpu_entry_area, we must shoot down
++       * all TLB entries for it.
++       */
++      flush_tlb_kernel_range(start, start + size);
++      preempt_enable();
+ }
+ 
+ static void ds_clear_cea(void *cea, size_t size)
+ {
++      unsigned long start = (unsigned long)cea;
+       size_t msz = 0;
+ 
++      preempt_disable();
+       for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
+               cea_set_pte(cea, 0, PAGE_NONE);
++
++      flush_tlb_kernel_range(start, start + size);
++      preempt_enable();
+ }
+ 
+ static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0238-x86-mm-Set-MODULES_END-to-0xffffffffff000000.patch b/patches/kernel/0238-x86-mm-Set-MODULES_END-to-0xffffffffff000000.patch

deleted file mode 100644 (file)

index 4b07728..0000000
--- a/patches/kernel/0238-x86-mm-Set-MODULES_END-to-0xffffffffff000000.patch
+++ /dev/null
@@ -1,104 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Date: Thu, 28 Dec 2017 19:06:20 +0300
-Subject: [PATCH] x86/mm: Set MODULES_END to 0xffffffffff000000
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-commit f5a40711fa58f1c109165a4fec6078bf2dfd2bdc upstream.
-
-Since f06bdd4001c2 ("x86/mm: Adapt MODULES_END based on fixmap section size")
-kasan_mem_to_shadow(MODULES_END) could be not aligned to a page boundary.
-
-So passing page unaligned address to kasan_populate_zero_shadow() have two
-possible effects:
-
-1) It may leave one page hole in supposed to be populated area. After commit
-  21506525fb8d ("x86/kasan/64: Teach KASAN about the cpu_entry_area") that
-  hole happens to be in the shadow covering fixmap area and leads to crash:
-
- BUG: unable to handle kernel paging request at fffffbffffe8ee04
- RIP: 0010:check_memory_region+0x5c/0x190
-
- Call Trace:
-  <NMI>
-  memcpy+0x1f/0x50
-  ghes_copy_tofrom_phys+0xab/0x180
-  ghes_read_estatus+0xfb/0x280
-  ghes_notify_nmi+0x2b2/0x410
-  nmi_handle+0x115/0x2c0
-  default_do_nmi+0x57/0x110
-  do_nmi+0xf8/0x150
-  end_repeat_nmi+0x1a/0x1e
-
-Note, the crash likely disappeared after commit 92a0f81d8957, which
-changed kasan_populate_zero_shadow() call the way it was before
-commit 21506525fb8d.
-
-2) Attempt to load module near MODULES_END will fail, because
-   __vmalloc_node_range() called from kasan_module_alloc() will hit the
-   WARN_ON(!pte_none(*pte)) in the vmap_pte_range() and bail out with error.
-
-To fix this we need to make kasan_mem_to_shadow(MODULES_END) page aligned
-which means that MODULES_END should be 8*PAGE_SIZE aligned.
-
-The whole point of commit f06bdd4001c2 was to move MODULES_END down if
-NR_CPUS is big, so the cpu_entry_area takes a lot of space.
-But since 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
-the cpu_entry_area is no longer in fixmap, so we could just set
-MODULES_END to a fixed 8*PAGE_SIZE aligned address.
-
-Fixes: f06bdd4001c2 ("x86/mm: Adapt MODULES_END based on fixmap section size")
-Reported-by: Jakub Kicinski <kubakici@wp.pl>
-Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Thomas Garnier <thgarnie@google.com>
-Link: https://lkml.kernel.org/r/20171228160620.23818-1-aryabinin@virtuozzo.com
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/x86/x86_64/mm.txt         | 5 +----
- arch/x86/include/asm/pgtable_64_types.h | 2 +-
- 2 files changed, 2 insertions(+), 5 deletions(-)
-
-diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
-index ad41b3813f0a..ddd5ffd31bd0 100644
---- a/Documentation/x86/x86_64/mm.txt
-+++ b/Documentation/x86/x86_64/mm.txt
-@@ -43,7 +43,7 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
- ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
- ... unused hole ...
- ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
--ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space
-+ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
- [fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
- ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
- ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
-@@ -67,9 +67,6 @@ memory window (this size is arbitrary, it can be raised later if needed).
- The mappings are not part of any other kernel PGD and are only available
- during EFI runtime calls.
- 
--The module mapping space size changes based on the CONFIG requirements for the
--following fixmap section.
--
- Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
- physical memory, vmalloc/ioremap space and virtual memory map are randomized.
- Their order is preserved but their base will be offset early at boot time.
-diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
-index e8a809ee0bb6..c92bd73b1e46 100644
---- a/arch/x86/include/asm/pgtable_64_types.h
-+++ b/arch/x86/include/asm/pgtable_64_types.h
-@@ -103,7 +103,7 @@ typedef struct { pteval_t pte; } pte_t;
- 
- #define MODULES_VADDR         (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
- /* The module sections ends with the start of the fixmap */
--#define MODULES_END           __fix_to_virt(__end_of_fixed_addresses + 1)
-+#define MODULES_END           _AC(0xffffffffff000000, UL)
- #define MODULES_LEN           (MODULES_END - MODULES_VADDR)
- 
- #define ESPFIX_PGD_ENTRY      _AC(-2, UL)
--- 
-2.14.2
-
diff --git a/patches/kernel/0239-x86-mm-Map-cpu_entry_area-at-the-same-place-on-4-5-l.patch b/patches/kernel/0239-x86-mm-Map-cpu_entry_area-at-the-same-place-on-4-5-l.patch

deleted file mode 100644 (file)

index 8f7ff69..0000000
--- a/patches/kernel/0239-x86-mm-Map-cpu_entry_area-at-the-same-place-on-4-5-l.patch
+++ /dev/null
@@ -1,98 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Thu, 4 Jan 2018 13:01:40 +0100
-Subject: [PATCH] x86/mm: Map cpu_entry_area at the same place on 4/5 level
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-commit f2078904810373211fb15f91888fba14c01a4acc upstream.
-
-There is no reason for 4 and 5 level pagetables to have a different
-layout. It just makes determining vaddr_end for KASLR harder than
-necessary.
-
-Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Benjamin Gilbert <benjamin.gilbert@coreos.com>
-Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Garnier <thgarnie@google.com>,
-Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
-Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801041320360.1771@nanos
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/x86/x86_64/mm.txt         | 7 ++++---
- arch/x86/include/asm/pgtable_64_types.h | 4 ++--
- arch/x86/mm/dump_pagetables.c           | 2 +-
- 3 files changed, 7 insertions(+), 6 deletions(-)
-
-diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
-index ddd5ffd31bd0..f7dabe1f01e9 100644
---- a/Documentation/x86/x86_64/mm.txt
-+++ b/Documentation/x86/x86_64/mm.txt
-@@ -12,8 +12,8 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
- ... unused hole ...
- ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
- ... unused hole ...
--fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
--fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
-+fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
-+fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
- ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
- ... unused hole ...
- ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
-@@ -37,7 +37,8 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
- ... unused hole ...
- ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
- ... unused hole ...
--fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
-+fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
-+... unused hole ...
- ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
- ... unused hole ...
- ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
-diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
-index c92bd73b1e46..0dd48d17a4a1 100644
---- a/arch/x86/include/asm/pgtable_64_types.h
-+++ b/arch/x86/include/asm/pgtable_64_types.h
-@@ -87,7 +87,7 @@ typedef struct { pteval_t pte; } pte_t;
- # define VMALLOC_SIZE_TB      _AC(32, UL)
- # define __VMALLOC_BASE               _AC(0xffffc90000000000, UL)
- # define __VMEMMAP_BASE               _AC(0xffffea0000000000, UL)
--# define LDT_PGD_ENTRY                _AC(-4, UL)
-+# define LDT_PGD_ENTRY                _AC(-3, UL)
- # define LDT_BASE_ADDR                (LDT_PGD_ENTRY << PGDIR_SHIFT)
- #endif
- 
-@@ -109,7 +109,7 @@ typedef struct { pteval_t pte; } pte_t;
- #define ESPFIX_PGD_ENTRY      _AC(-2, UL)
- #define ESPFIX_BASE_ADDR      (ESPFIX_PGD_ENTRY << P4D_SHIFT)
- 
--#define CPU_ENTRY_AREA_PGD    _AC(-3, UL)
-+#define CPU_ENTRY_AREA_PGD    _AC(-4, UL)
- #define CPU_ENTRY_AREA_BASE   (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
- 
- #define EFI_VA_START          ( -4 * (_AC(1, UL) << 30))
-diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
-index 12b93d350480..a764bf6f3473 100644
---- a/arch/x86/mm/dump_pagetables.c
-+++ b/arch/x86/mm/dump_pagetables.c
-@@ -61,10 +61,10 @@ enum address_markers_idx {
-       KASAN_SHADOW_START_NR,
-       KASAN_SHADOW_END_NR,
- #endif
-+      CPU_ENTRY_AREA_NR,
- #if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
-       LDT_NR,
- #endif
--      CPU_ENTRY_AREA_NR,
- #ifdef CONFIG_X86_ESPFIX64
-       ESPFIX_START_NR,
- #endif
--- 
-2.14.2
-
diff --git a/patches/kernel/0239-x86-mm-Set-MODULES_END-to-0xffffffffff000000.patch b/patches/kernel/0239-x86-mm-Set-MODULES_END-to-0xffffffffff000000.patch

new file mode 100644 (file)

index 0000000..4b07728
--- /dev/null
+++ b/patches/kernel/0239-x86-mm-Set-MODULES_END-to-0xffffffffff000000.patch
@@ -0,0 +1,104 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Date: Thu, 28 Dec 2017 19:06:20 +0300
+Subject: [PATCH] x86/mm: Set MODULES_END to 0xffffffffff000000
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit f5a40711fa58f1c109165a4fec6078bf2dfd2bdc upstream.
+
+Since f06bdd4001c2 ("x86/mm: Adapt MODULES_END based on fixmap section size")
+kasan_mem_to_shadow(MODULES_END) could be not aligned to a page boundary.
+
+So passing page unaligned address to kasan_populate_zero_shadow() have two
+possible effects:
+
+1) It may leave one page hole in supposed to be populated area. After commit
+  21506525fb8d ("x86/kasan/64: Teach KASAN about the cpu_entry_area") that
+  hole happens to be in the shadow covering fixmap area and leads to crash:
+
+ BUG: unable to handle kernel paging request at fffffbffffe8ee04
+ RIP: 0010:check_memory_region+0x5c/0x190
+
+ Call Trace:
+  <NMI>
+  memcpy+0x1f/0x50
+  ghes_copy_tofrom_phys+0xab/0x180
+  ghes_read_estatus+0xfb/0x280
+  ghes_notify_nmi+0x2b2/0x410
+  nmi_handle+0x115/0x2c0
+  default_do_nmi+0x57/0x110
+  do_nmi+0xf8/0x150
+  end_repeat_nmi+0x1a/0x1e
+
+Note, the crash likely disappeared after commit 92a0f81d8957, which
+changed kasan_populate_zero_shadow() call the way it was before
+commit 21506525fb8d.
+
+2) Attempt to load module near MODULES_END will fail, because
+   __vmalloc_node_range() called from kasan_module_alloc() will hit the
+   WARN_ON(!pte_none(*pte)) in the vmap_pte_range() and bail out with error.
+
+To fix this we need to make kasan_mem_to_shadow(MODULES_END) page aligned
+which means that MODULES_END should be 8*PAGE_SIZE aligned.
+
+The whole point of commit f06bdd4001c2 was to move MODULES_END down if
+NR_CPUS is big, so the cpu_entry_area takes a lot of space.
+But since 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
+the cpu_entry_area is no longer in fixmap, so we could just set
+MODULES_END to a fixed 8*PAGE_SIZE aligned address.
+
+Fixes: f06bdd4001c2 ("x86/mm: Adapt MODULES_END based on fixmap section size")
+Reported-by: Jakub Kicinski <kubakici@wp.pl>
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Thomas Garnier <thgarnie@google.com>
+Link: https://lkml.kernel.org/r/20171228160620.23818-1-aryabinin@virtuozzo.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/x86/x86_64/mm.txt         | 5 +----
+ arch/x86/include/asm/pgtable_64_types.h | 2 +-
+ 2 files changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
+index ad41b3813f0a..ddd5ffd31bd0 100644
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -43,7 +43,7 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+ ... unused hole ...
+ ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
+-ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space
++ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
+ [fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
+ ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+@@ -67,9 +67,6 @@ memory window (this size is arbitrary, it can be raised later if needed).
+ The mappings are not part of any other kernel PGD and are only available
+ during EFI runtime calls.
+ 
+-The module mapping space size changes based on the CONFIG requirements for the
+-following fixmap section.
+-
+ Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
+ physical memory, vmalloc/ioremap space and virtual memory map are randomized.
+ Their order is preserved but their base will be offset early at boot time.
+diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
+index e8a809ee0bb6..c92bd73b1e46 100644
+--- a/arch/x86/include/asm/pgtable_64_types.h
++++ b/arch/x86/include/asm/pgtable_64_types.h
+@@ -103,7 +103,7 @@ typedef struct { pteval_t pte; } pte_t;
+ 
+ #define MODULES_VADDR         (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+ /* The module sections ends with the start of the fixmap */
+-#define MODULES_END           __fix_to_virt(__end_of_fixed_addresses + 1)
++#define MODULES_END           _AC(0xffffffffff000000, UL)
+ #define MODULES_LEN           (MODULES_END - MODULES_VADDR)
+ 
+ #define ESPFIX_PGD_ENTRY      _AC(-2, UL)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0240-x86-kaslr-Fix-the-vaddr_end-mess.patch b/patches/kernel/0240-x86-kaslr-Fix-the-vaddr_end-mess.patch

deleted file mode 100644 (file)

index 112e421..0000000
--- a/patches/kernel/0240-x86-kaslr-Fix-the-vaddr_end-mess.patch
+++ /dev/null
@@ -1,144 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Thu, 4 Jan 2018 12:32:03 +0100
-Subject: [PATCH] x86/kaslr: Fix the vaddr_end mess
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-commit 1dddd25125112ba49706518ac9077a1026a18f37 upstream.
-
-vaddr_end for KASLR is only documented in the KASLR code itself and is
-adjusted depending on config options. So it's not surprising that a change
-of the memory layout causes KASLR to have the wrong vaddr_end. This can map
-arbitrary stuff into other areas causing hard to understand problems.
-
-Remove the whole ifdef magic and define the start of the cpu_entry_area to
-be the end of the KASLR vaddr range.
-
-Add documentation to that effect.
-
-Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
-Reported-by: Benjamin Gilbert <benjamin.gilbert@coreos.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Tested-by: Benjamin Gilbert <benjamin.gilbert@coreos.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Garnier <thgarnie@google.com>,
-Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
-Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801041320360.1771@nanos
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/x86/x86_64/mm.txt         |  6 ++++++
- arch/x86/include/asm/pgtable_64_types.h |  8 +++++++-
- arch/x86/mm/kaslr.c                     | 32 +++++++++-----------------------
- 3 files changed, 22 insertions(+), 24 deletions(-)
-
-diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
-index f7dabe1f01e9..ea91cb61a602 100644
---- a/Documentation/x86/x86_64/mm.txt
-+++ b/Documentation/x86/x86_64/mm.txt
-@@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
- ... unused hole ...
- ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
- ... unused hole ...
-+                                  vaddr_end for KASLR
- fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
- fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
- ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
-@@ -37,6 +38,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
- ... unused hole ...
- ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
- ... unused hole ...
-+                                  vaddr_end for KASLR
- fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
- ... unused hole ...
- ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
-@@ -71,3 +73,7 @@ during EFI runtime calls.
- Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
- physical memory, vmalloc/ioremap space and virtual memory map are randomized.
- Their order is preserved but their base will be offset early at boot time.
-+
-+Be very careful vs. KASLR when changing anything here. The KASLR address
-+range must not overlap with anything except the KASAN shadow area, which is
-+correct as KASAN disables KASLR.
-diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
-index 0dd48d17a4a1..928d558e7778 100644
---- a/arch/x86/include/asm/pgtable_64_types.h
-+++ b/arch/x86/include/asm/pgtable_64_types.h
-@@ -74,7 +74,13 @@ typedef struct { pteval_t pte; } pte_t;
- #define PGDIR_SIZE    (_AC(1, UL) << PGDIR_SHIFT)
- #define PGDIR_MASK    (~(PGDIR_SIZE - 1))
- 
--/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-+/*
-+ * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
-+ *
-+ * Be very careful vs. KASLR when changing anything here. The KASLR address
-+ * range must not overlap with anything except the KASAN shadow area, which
-+ * is correct as KASAN disables KASLR.
-+ */
- #define MAXMEM                        _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
- 
- #ifdef CONFIG_X86_5LEVEL
-diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
-index af599167fe3c..debc7cc8e152 100644
---- a/arch/x86/mm/kaslr.c
-+++ b/arch/x86/mm/kaslr.c
-@@ -33,25 +33,14 @@
- #define TB_SHIFT 40
- 
- /*
-- * Virtual address start and end range for randomization. The end changes base
-- * on configuration to have the highest amount of space for randomization.
-- * It increases the possible random position for each randomized region.
-+ * Virtual address start and end range for randomization.
-  *
-- * You need to add an if/def entry if you introduce a new memory region
-- * compatible with KASLR. Your entry must be in logical order with memory
-- * layout. For example, ESPFIX is before EFI because its virtual address is
-- * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
-- * ensure that this order is correct and won't be changed.
-+ * The end address could depend on more configuration options to make the
-+ * highest amount of space for randomization available, but that's too hard
-+ * to keep straight and caused issues already.
-  */
- static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
--
--#if defined(CONFIG_X86_ESPFIX64)
--static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
--#elif defined(CONFIG_EFI)
--static const unsigned long vaddr_end = EFI_VA_END;
--#else
--static const unsigned long vaddr_end = __START_KERNEL_map;
--#endif
-+static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
- 
- /* Default values */
- unsigned long page_offset_base = __PAGE_OFFSET_BASE;
-@@ -100,15 +89,12 @@ void __init kernel_randomize_memory(void)
-       unsigned long remain_entropy;
- 
-       /*
--       * All these BUILD_BUG_ON checks ensures the memory layout is
--       * consistent with the vaddr_start/vaddr_end variables.
-+       * These BUILD_BUG_ON checks ensure the memory layout is consistent
-+       * with the vaddr_start/vaddr_end variables. These checks are very
-+       * limited....
-        */
-       BUILD_BUG_ON(vaddr_start >= vaddr_end);
--      BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
--                   vaddr_end >= EFI_VA_END);
--      BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
--                    IS_ENABLED(CONFIG_EFI)) &&
--                   vaddr_end >= __START_KERNEL_map);
-+      BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
-       BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
- 
-       if (!kaslr_memory_enabled())
--- 
-2.14.2
-
diff --git a/patches/kernel/0240-x86-mm-Map-cpu_entry_area-at-the-same-place-on-4-5-l.patch b/patches/kernel/0240-x86-mm-Map-cpu_entry_area-at-the-same-place-on-4-5-l.patch

new file mode 100644 (file)

index 0000000..8f7ff69
--- /dev/null
+++ b/patches/kernel/0240-x86-mm-Map-cpu_entry_area-at-the-same-place-on-4-5-l.patch
@@ -0,0 +1,98 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 4 Jan 2018 13:01:40 +0100
+Subject: [PATCH] x86/mm: Map cpu_entry_area at the same place on 4/5 level
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit f2078904810373211fb15f91888fba14c01a4acc upstream.
+
+There is no reason for 4 and 5 level pagetables to have a different
+layout. It just makes determining vaddr_end for KASLR harder than
+necessary.
+
+Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Benjamin Gilbert <benjamin.gilbert@coreos.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Garnier <thgarnie@google.com>,
+Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
+Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801041320360.1771@nanos
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/x86/x86_64/mm.txt         | 7 ++++---
+ arch/x86/include/asm/pgtable_64_types.h | 4 ++--
+ arch/x86/mm/dump_pagetables.c           | 2 +-
+ 3 files changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
+index ddd5ffd31bd0..f7dabe1f01e9 100644
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -12,8 +12,8 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
+ ... unused hole ...
+ ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
+ ... unused hole ...
+-fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
+-fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
++fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
++fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
+ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ... unused hole ...
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+@@ -37,7 +37,8 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
+ ... unused hole ...
+ ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
+ ... unused hole ...
+-fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
++fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
++... unused hole ...
+ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+ ... unused hole ...
+ ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
+index c92bd73b1e46..0dd48d17a4a1 100644
+--- a/arch/x86/include/asm/pgtable_64_types.h
++++ b/arch/x86/include/asm/pgtable_64_types.h
+@@ -87,7 +87,7 @@ typedef struct { pteval_t pte; } pte_t;
+ # define VMALLOC_SIZE_TB      _AC(32, UL)
+ # define __VMALLOC_BASE               _AC(0xffffc90000000000, UL)
+ # define __VMEMMAP_BASE               _AC(0xffffea0000000000, UL)
+-# define LDT_PGD_ENTRY                _AC(-4, UL)
++# define LDT_PGD_ENTRY                _AC(-3, UL)
+ # define LDT_BASE_ADDR                (LDT_PGD_ENTRY << PGDIR_SHIFT)
+ #endif
+ 
+@@ -109,7 +109,7 @@ typedef struct { pteval_t pte; } pte_t;
+ #define ESPFIX_PGD_ENTRY      _AC(-2, UL)
+ #define ESPFIX_BASE_ADDR      (ESPFIX_PGD_ENTRY << P4D_SHIFT)
+ 
+-#define CPU_ENTRY_AREA_PGD    _AC(-3, UL)
++#define CPU_ENTRY_AREA_PGD    _AC(-4, UL)
+ #define CPU_ENTRY_AREA_BASE   (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
+ 
+ #define EFI_VA_START          ( -4 * (_AC(1, UL) << 30))
+diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
+index 12b93d350480..a764bf6f3473 100644
+--- a/arch/x86/mm/dump_pagetables.c
++++ b/arch/x86/mm/dump_pagetables.c
+@@ -61,10 +61,10 @@ enum address_markers_idx {
+       KASAN_SHADOW_START_NR,
+       KASAN_SHADOW_END_NR,
+ #endif
++      CPU_ENTRY_AREA_NR,
+ #if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
+       LDT_NR,
+ #endif
+-      CPU_ENTRY_AREA_NR,
+ #ifdef CONFIG_X86_ESPFIX64
+       ESPFIX_START_NR,
+ #endif
+-- 
+2.14.2
+
diff --git a/patches/kernel/0241-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch b/patches/kernel/0241-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch

deleted file mode 100644 (file)

index 79d0c2c..0000000
--- a/patches/kernel/0241-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch
+++ /dev/null
@@ -1,62 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: David Woodhouse <dwmw@amazon.co.uk>
-Date: Thu, 4 Jan 2018 14:37:05 +0000
-Subject: [PATCH] x86/alternatives: Add missing '\n' at end of ALTERNATIVE
- inline asm
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-commit b9e705ef7cfaf22db0daab91ad3cd33b0fa32eb9 upstream.
-
-Where an ALTERNATIVE is used in the middle of an inline asm block, this
-would otherwise lead to the following instruction being appended directly
-to the trailing ".popsection", and a failed compile.
-
-Fixes: 9cebed423c84 ("x86, alternative: Use .pushsection/.popsection")
-Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: gnomes@lxorguk.ukuu.org.uk
-Cc: Rik van Riel <riel@redhat.com>
-Cc: ak@linux.intel.com
-Cc: Tim Chen <tim.c.chen@linux.intel.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Paul Turner <pjt@google.com>
-Cc: Jiri Kosina <jikos@kernel.org>
-Cc: Andy Lutomirski <luto@amacapital.net>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Kees Cook <keescook@google.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
-Link: https://lkml.kernel.org/r/20180104143710.8961-8-dwmw@amazon.co.uk
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/alternative.h | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
-index d4aea31eec03..deca9b9c7923 100644
---- a/arch/x86/include/asm/alternative.h
-+++ b/arch/x86/include/asm/alternative.h
-@@ -139,7 +139,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
-       ".popsection\n"                                                 \
-       ".pushsection .altinstr_replacement, \"ax\"\n"                  \
-       ALTINSTR_REPLACEMENT(newinstr, feature, 1)                      \
--      ".popsection"
-+      ".popsection\n"
- 
- #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
-       OLDINSTR_2(oldinstr, 1, 2)                                      \
-@@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
-       ".pushsection .altinstr_replacement, \"ax\"\n"                  \
-       ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)                    \
-       ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)                    \
--      ".popsection"
-+      ".popsection\n"
- 
- /*
-  * Alternative instructions for different CPU types or capabilities.
--- 
-2.14.2
-
diff --git a/patches/kernel/0241-x86-kaslr-Fix-the-vaddr_end-mess.patch b/patches/kernel/0241-x86-kaslr-Fix-the-vaddr_end-mess.patch

new file mode 100644 (file)

index 0000000..112e421
--- /dev/null
+++ b/patches/kernel/0241-x86-kaslr-Fix-the-vaddr_end-mess.patch
@@ -0,0 +1,144 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 4 Jan 2018 12:32:03 +0100
+Subject: [PATCH] x86/kaslr: Fix the vaddr_end mess
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 1dddd25125112ba49706518ac9077a1026a18f37 upstream.
+
+vaddr_end for KASLR is only documented in the KASLR code itself and is
+adjusted depending on config options. So it's not surprising that a change
+of the memory layout causes KASLR to have the wrong vaddr_end. This can map
+arbitrary stuff into other areas causing hard to understand problems.
+
+Remove the whole ifdef magic and define the start of the cpu_entry_area to
+be the end of the KASLR vaddr range.
+
+Add documentation to that effect.
+
+Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
+Reported-by: Benjamin Gilbert <benjamin.gilbert@coreos.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Benjamin Gilbert <benjamin.gilbert@coreos.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Garnier <thgarnie@google.com>,
+Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
+Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801041320360.1771@nanos
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/x86/x86_64/mm.txt         |  6 ++++++
+ arch/x86/include/asm/pgtable_64_types.h |  8 +++++++-
+ arch/x86/mm/kaslr.c                     | 32 +++++++++-----------------------
+ 3 files changed, 22 insertions(+), 24 deletions(-)
+
+diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
+index f7dabe1f01e9..ea91cb61a602 100644
+--- a/Documentation/x86/x86_64/mm.txt
++++ b/Documentation/x86/x86_64/mm.txt
+@@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
+ ... unused hole ...
+ ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
+ ... unused hole ...
++                                  vaddr_end for KASLR
+ fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
+ fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
+ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+@@ -37,6 +38,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
+ ... unused hole ...
+ ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
+ ... unused hole ...
++                                  vaddr_end for KASLR
+ fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
+ ... unused hole ...
+ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+@@ -71,3 +73,7 @@ during EFI runtime calls.
+ Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
+ physical memory, vmalloc/ioremap space and virtual memory map are randomized.
+ Their order is preserved but their base will be offset early at boot time.
++
++Be very careful vs. KASLR when changing anything here. The KASLR address
++range must not overlap with anything except the KASAN shadow area, which is
++correct as KASAN disables KASLR.
+diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
+index 0dd48d17a4a1..928d558e7778 100644
+--- a/arch/x86/include/asm/pgtable_64_types.h
++++ b/arch/x86/include/asm/pgtable_64_types.h
+@@ -74,7 +74,13 @@ typedef struct { pteval_t pte; } pte_t;
+ #define PGDIR_SIZE    (_AC(1, UL) << PGDIR_SHIFT)
+ #define PGDIR_MASK    (~(PGDIR_SIZE - 1))
+ 
+-/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
++/*
++ * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
++ *
++ * Be very careful vs. KASLR when changing anything here. The KASLR address
++ * range must not overlap with anything except the KASAN shadow area, which
++ * is correct as KASAN disables KASLR.
++ */
+ #define MAXMEM                        _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+ 
+ #ifdef CONFIG_X86_5LEVEL
+diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
+index af599167fe3c..debc7cc8e152 100644
+--- a/arch/x86/mm/kaslr.c
++++ b/arch/x86/mm/kaslr.c
+@@ -33,25 +33,14 @@
+ #define TB_SHIFT 40
+ 
+ /*
+- * Virtual address start and end range for randomization. The end changes base
+- * on configuration to have the highest amount of space for randomization.
+- * It increases the possible random position for each randomized region.
++ * Virtual address start and end range for randomization.
+  *
+- * You need to add an if/def entry if you introduce a new memory region
+- * compatible with KASLR. Your entry must be in logical order with memory
+- * layout. For example, ESPFIX is before EFI because its virtual address is
+- * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
+- * ensure that this order is correct and won't be changed.
++ * The end address could depend on more configuration options to make the
++ * highest amount of space for randomization available, but that's too hard
++ * to keep straight and caused issues already.
+  */
+ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
+-
+-#if defined(CONFIG_X86_ESPFIX64)
+-static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
+-#elif defined(CONFIG_EFI)
+-static const unsigned long vaddr_end = EFI_VA_END;
+-#else
+-static const unsigned long vaddr_end = __START_KERNEL_map;
+-#endif
++static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
+ 
+ /* Default values */
+ unsigned long page_offset_base = __PAGE_OFFSET_BASE;
+@@ -100,15 +89,12 @@ void __init kernel_randomize_memory(void)
+       unsigned long remain_entropy;
+ 
+       /*
+-       * All these BUILD_BUG_ON checks ensures the memory layout is
+-       * consistent with the vaddr_start/vaddr_end variables.
++       * These BUILD_BUG_ON checks ensure the memory layout is consistent
++       * with the vaddr_start/vaddr_end variables. These checks are very
++       * limited....
+        */
+       BUILD_BUG_ON(vaddr_start >= vaddr_end);
+-      BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
+-                   vaddr_end >= EFI_VA_END);
+-      BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
+-                    IS_ENABLED(CONFIG_EFI)) &&
+-                   vaddr_end >= __START_KERNEL_map);
++      BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
+       BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
+ 
+       if (!kaslr_memory_enabled())
+-- 
+2.14.2
+
diff --git a/patches/kernel/0242-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch b/patches/kernel/0242-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch

new file mode 100644 (file)

index 0000000..79d0c2c
--- /dev/null
+++ b/patches/kernel/0242-x86-alternatives-Add-missing-n-at-end-of-ALTERNATIVE.patch
@@ -0,0 +1,62 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 4 Jan 2018 14:37:05 +0000
+Subject: [PATCH] x86/alternatives: Add missing '\n' at end of ALTERNATIVE
+ inline asm
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit b9e705ef7cfaf22db0daab91ad3cd33b0fa32eb9 upstream.
+
+Where an ALTERNATIVE is used in the middle of an inline asm block, this
+would otherwise lead to the following instruction being appended directly
+to the trailing ".popsection", and a failed compile.
+
+Fixes: 9cebed423c84 ("x86, alternative: Use .pushsection/.popsection")
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: ak@linux.intel.com
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Paul Turner <pjt@google.com>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Link: https://lkml.kernel.org/r/20180104143710.8961-8-dwmw@amazon.co.uk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/alternative.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
+index d4aea31eec03..deca9b9c7923 100644
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -139,7 +139,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
+       ".popsection\n"                                                 \
+       ".pushsection .altinstr_replacement, \"ax\"\n"                  \
+       ALTINSTR_REPLACEMENT(newinstr, feature, 1)                      \
+-      ".popsection"
++      ".popsection\n"
+ 
+ #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
+       OLDINSTR_2(oldinstr, 1, 2)                                      \
+@@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
+       ".pushsection .altinstr_replacement, \"ax\"\n"                  \
+       ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)                    \
+       ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)                    \
+-      ".popsection"
++      ".popsection\n"
+ 
+ /*
+  * Alternative instructions for different CPU types or capabilities.
+-- 
+2.14.2
+
diff --git a/patches/kernel/0242-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch b/patches/kernel/0242-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch

deleted file mode 100644 (file)

index 9f6c71e..0000000
--- a/patches/kernel/0242-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Tue, 26 Dec 2017 23:43:54 -0600
-Subject: [PATCH] x86/cpu, x86/pti: Do not enable PTI on AMD processors
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-AMD processors are not subject to the types of attacks that the kernel
-page table isolation feature protects against.  The AMD microarchitecture
-does not allow memory references, including speculative references, that
-access higher privileged data when running in a lesser privileged mode
-when that access would result in a page fault.
-
-Disable page table isolation by default on AMD processors by not setting
-the X86_BUG_CPU_INSECURE feature, which controls whether X86_FEATURE_PTI
-is set.
-
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: stable@vger.kernel.org
-Link: https://lkml.kernel.org/r/20171227054354.20369.94587.stgit@tlendack-t1.amdoffice.net
-
-(cherry picked from commit 694d99d40972f12e59a3696effee8a376b79d7c8)
-Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
-(cherry picked from commit 9d334f48f017b9c6457c6ba321e5a53a1cc6a5c7)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/common.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 99f37d1636ff..1854dd8071a6 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -899,8 +899,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
- 
-       setup_force_cpu_cap(X86_FEATURE_ALWAYS);
- 
--      /* Assume for now that ALL x86 CPUs are insecure */
--      setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
-+      if (c->x86_vendor != X86_VENDOR_AMD)
-+              setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
- 
-       fpu__init_system(c);
- }
--- 
-2.14.2
-
diff --git a/patches/kernel/0243-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch b/patches/kernel/0243-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch

new file mode 100644 (file)

index 0000000..9f6c71e
--- /dev/null
+++ b/patches/kernel/0243-x86-cpu-x86-pti-Do-not-enable-PTI-on-AMD-processors.patch
@@ -0,0 +1,54 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Tue, 26 Dec 2017 23:43:54 -0600
+Subject: [PATCH] x86/cpu, x86/pti: Do not enable PTI on AMD processors
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+AMD processors are not subject to the types of attacks that the kernel
+page table isolation feature protects against.  The AMD microarchitecture
+does not allow memory references, including speculative references, that
+access higher privileged data when running in a lesser privileged mode
+when that access would result in a page fault.
+
+Disable page table isolation by default on AMD processors by not setting
+the X86_BUG_CPU_INSECURE feature, which controls whether X86_FEATURE_PTI
+is set.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20171227054354.20369.94587.stgit@tlendack-t1.amdoffice.net
+
+(cherry picked from commit 694d99d40972f12e59a3696effee8a376b79d7c8)
+Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
+(cherry picked from commit 9d334f48f017b9c6457c6ba321e5a53a1cc6a5c7)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/common.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 99f37d1636ff..1854dd8071a6 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -899,8 +899,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
+ 
+       setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+ 
+-      /* Assume for now that ALL x86 CPUs are insecure */
+-      setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
++      if (c->x86_vendor != X86_VENDOR_AMD)
++              setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
+ 
+       fpu__init_system(c);
+ }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0243-x86-microcode-AMD-Add-support-for-fam17h-microcode-l.patch b/patches/kernel/0243-x86-microcode-AMD-Add-support-for-fam17h-microcode-l.patch

deleted file mode 100644 (file)

index b7d44c5..0000000
--- a/patches/kernel/0243-x86-microcode-AMD-Add-support-for-fam17h-microcode-l.patch
+++ /dev/null
@@ -1,51 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Thu, 30 Nov 2017 16:46:40 -0600
-Subject: [PATCH] x86/microcode/AMD: Add support for fam17h microcode loading
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-commit f4e9b7af0cd58dd039a0fb2cd67d57cea4889abf upstream.
-
-The size for the Microcode Patch Block (MPB) for an AMD family 17h
-processor is 3200 bytes.  Add a #define for fam17h so that it does
-not default to 2048 bytes and fail a microcode load/update.
-
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Borislav Petkov <bp@alien8.de>
-Link: https://lkml.kernel.org/r/20171130224640.15391.40247.stgit@tlendack-t1.amdoffice.net
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Cc: Alice Ferrazzi <alicef@gentoo.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/microcode/amd.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
-index 21b185793c80..248cad00fee6 100644
---- a/arch/x86/kernel/cpu/microcode/amd.c
-+++ b/arch/x86/kernel/cpu/microcode/amd.c
-@@ -467,6 +467,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
- #define F14H_MPB_MAX_SIZE 1824
- #define F15H_MPB_MAX_SIZE 4096
- #define F16H_MPB_MAX_SIZE 3458
-+#define F17H_MPB_MAX_SIZE 3200
- 
-       switch (family) {
-       case 0x14:
-@@ -478,6 +479,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
-       case 0x16:
-               max_size = F16H_MPB_MAX_SIZE;
-               break;
-+      case 0x17:
-+              max_size = F17H_MPB_MAX_SIZE;
-+              break;
-       default:
-               max_size = F1XH_MPB_MAX_SIZE;
-               break;
--- 
-2.14.2
-
diff --git a/patches/kernel/0244-Revert-scsi-libsas-allow-async-aborts.patch b/patches/kernel/0244-Revert-scsi-libsas-allow-async-aborts.patch

deleted file mode 100644 (file)

index f6b461c..0000000
--- a/patches/kernel/0244-Revert-scsi-libsas-allow-async-aborts.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
-Date: Mon, 8 Jan 2018 11:49:28 +0100
-Subject: [PATCH] Revert "scsi: libsas: allow async aborts"
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This reverts commit 909657615d9b3ce709be4fd95b9a9e8c8c7c2be6.
-
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- drivers/scsi/libsas/sas_scsi_host.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
-index 87e5079d816b..137fb586aa64 100644
---- a/drivers/scsi/libsas/sas_scsi_host.c
-+++ b/drivers/scsi/libsas/sas_scsi_host.c
-@@ -491,6 +491,9 @@ int sas_eh_abort_handler(struct scsi_cmnd *cmd)
-       struct Scsi_Host *host = cmd->device->host;
-       struct sas_internal *i = to_sas_internal(host->transportt);
- 
-+      if (current != host->ehandler)
-+              return FAILED;
-+
-       if (!i->dft->lldd_abort_task)
-               return FAILED;
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0244-x86-microcode-AMD-Add-support-for-fam17h-microcode-l.patch b/patches/kernel/0244-x86-microcode-AMD-Add-support-for-fam17h-microcode-l.patch

new file mode 100644 (file)

index 0000000..b7d44c5
--- /dev/null
+++ b/patches/kernel/0244-x86-microcode-AMD-Add-support-for-fam17h-microcode-l.patch
@@ -0,0 +1,51 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Thu, 30 Nov 2017 16:46:40 -0600
+Subject: [PATCH] x86/microcode/AMD: Add support for fam17h microcode loading
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit f4e9b7af0cd58dd039a0fb2cd67d57cea4889abf upstream.
+
+The size for the Microcode Patch Block (MPB) for an AMD family 17h
+processor is 3200 bytes.  Add a #define for fam17h so that it does
+not default to 2048 bytes and fail a microcode load/update.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@alien8.de>
+Link: https://lkml.kernel.org/r/20171130224640.15391.40247.stgit@tlendack-t1.amdoffice.net
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Alice Ferrazzi <alicef@gentoo.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/microcode/amd.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
+index 21b185793c80..248cad00fee6 100644
+--- a/arch/x86/kernel/cpu/microcode/amd.c
++++ b/arch/x86/kernel/cpu/microcode/amd.c
+@@ -467,6 +467,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
+ #define F14H_MPB_MAX_SIZE 1824
+ #define F15H_MPB_MAX_SIZE 4096
+ #define F16H_MPB_MAX_SIZE 3458
++#define F17H_MPB_MAX_SIZE 3200
+ 
+       switch (family) {
+       case 0x14:
+@@ -478,6 +479,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
+       case 0x16:
+               max_size = F16H_MPB_MAX_SIZE;
+               break;
++      case 0x17:
++              max_size = F17H_MPB_MAX_SIZE;
++              break;
+       default:
+               max_size = F1XH_MPB_MAX_SIZE;
+               break;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0245-Revert-scsi-libsas-allow-async-aborts.patch b/patches/kernel/0245-Revert-scsi-libsas-allow-async-aborts.patch

new file mode 100644 (file)

index 0000000..f6b461c
--- /dev/null
+++ b/patches/kernel/0245-Revert-scsi-libsas-allow-async-aborts.patch
@@ -0,0 +1,32 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
+Date: Mon, 8 Jan 2018 11:49:28 +0100
+Subject: [PATCH] Revert "scsi: libsas: allow async aborts"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This reverts commit 909657615d9b3ce709be4fd95b9a9e8c8c7c2be6.
+
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/scsi/libsas/sas_scsi_host.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
+index 87e5079d816b..137fb586aa64 100644
+--- a/drivers/scsi/libsas/sas_scsi_host.c
++++ b/drivers/scsi/libsas/sas_scsi_host.c
+@@ -491,6 +491,9 @@ int sas_eh_abort_handler(struct scsi_cmnd *cmd)
+       struct Scsi_Host *host = cmd->device->host;
+       struct sas_internal *i = to_sas_internal(host->transportt);
+ 
++      if (current != host->ehandler)
++              return FAILED;
++
+       if (!i->dft->lldd_abort_task)
+               return FAILED;
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0245-x86-pti-Make-sure-the-user-kernel-PTEs-match.patch b/patches/kernel/0245-x86-pti-Make-sure-the-user-kernel-PTEs-match.patch

deleted file mode 100644 (file)

index 9f594a1..0000000
--- a/patches/kernel/0245-x86-pti-Make-sure-the-user-kernel-PTEs-match.patch
+++ /dev/null
@@ -1,66 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 3 Jan 2018 15:57:59 +0100
-Subject: [PATCH] x86/pti: Make sure the user/kernel PTEs match
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Meelis reported that his K8 Athlon64 emits MCE warnings when PTI is
-enabled:
-
-[Hardware Error]: Error Addr: 0x0000ffff81e000e0
-[Hardware Error]: MC1 Error: L1 TLB multimatch.
-[Hardware Error]: cache level: L1, tx: INSN
-
-The address is in the entry area, which is mapped into kernel _AND_ user
-space. That's special because we switch CR3 while we are executing
-there.
-
-User mapping:
-0xffffffff81e00000-0xffffffff82000000           2M     ro         PSE     GLB x  pmd
-
-Kernel mapping:
-0xffffffff81000000-0xffffffff82000000          16M     ro         PSE         x  pmd
-
-So the K8 is complaining that the TLB entries differ. They differ in the
-GLB bit.
-
-Drop the GLB bit when installing the user shared mapping.
-
-Fixes: 6dc72c3cbca0 ("x86/mm/pti: Share entry text PMD")
-Reported-by: Meelis Roos <mroos@linux.ee>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Tested-by: Meelis Roos <mroos@linux.ee>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Tom Lendacky <thomas.lendacky@amd.com>
-Cc: stable@vger.kernel.org
-Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801031407180.1957@nanos
-(cherry picked from commit 52994c256df36fda9a715697431cba9daecb6b11)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8a95d206afc447d8461815c67e618bd8b2c6457f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/pti.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
-index bce8aea65606..2da28ba97508 100644
---- a/arch/x86/mm/pti.c
-+++ b/arch/x86/mm/pti.c
-@@ -367,7 +367,8 @@ static void __init pti_setup_espfix64(void)
- static void __init pti_clone_entry_text(void)
- {
-       pti_clone_pmds((unsigned long) __entry_text_start,
--                      (unsigned long) __irqentry_text_end, _PAGE_RW);
-+                      (unsigned long) __irqentry_text_end,
-+                     _PAGE_RW | _PAGE_GLOBAL);
- }
- 
- /*
--- 
-2.14.2
-
diff --git a/patches/kernel/0246-x86-dumpstack-Fix-partial-register-dumps.patch b/patches/kernel/0246-x86-dumpstack-Fix-partial-register-dumps.patch

deleted file mode 100644 (file)

index c450268..0000000
--- a/patches/kernel/0246-x86-dumpstack-Fix-partial-register-dumps.patch
+++ /dev/null
@@ -1,172 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Sun, 31 Dec 2017 10:18:06 -0600
-Subject: [PATCH] x86/dumpstack: Fix partial register dumps
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The show_regs_safe() logic is wrong.  When there's an iret stack frame,
-it prints the entire pt_regs -- most of which is random stack data --
-instead of just the five registers at the end.
-
-show_regs_safe() is also poorly named: the on_stack() checks aren't for
-safety.  Rename the function to show_regs_if_on_stack() and add a
-comment to explain why the checks are needed.
-
-These issues were introduced with the "partial register dump" feature of
-the following commit:
-
-  b02fcf9ba121 ("x86/unwinder: Handle stack overflows more gracefully")
-
-That patch had gone through a few iterations of development, and the
-above issues were artifacts from a previous iteration of the patch where
-'regs' pointed directly to the iret frame rather than to the (partially
-empty) pt_regs.
-
-Tested-by: Alexander Tsoy <alexander@tsoy.me>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: Toralf Förster <toralf.foerster@gmx.de>
-Cc: stable@vger.kernel.org
-Fixes: b02fcf9ba121 ("x86/unwinder: Handle stack overflows more gracefully")
-Link: http://lkml.kernel.org/r/5b05b8b344f59db2d3d50dbdeba92d60f2304c54.1514736742.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit a9cdbe72c4e8bf3b38781c317a79326e2e1a230d)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 3f159d02ecca1ffe81dc467767833dd6d0345147)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/unwind.h | 17 +++++++++++++----
- arch/x86/kernel/dumpstack.c   | 28 ++++++++++++++++++++--------
- arch/x86/kernel/stacktrace.c  |  2 +-
- 3 files changed, 34 insertions(+), 13 deletions(-)
-
-diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
-index 38fa6154e382..e1c1cb5019bc 100644
---- a/arch/x86/include/asm/unwind.h
-+++ b/arch/x86/include/asm/unwind.h
-@@ -55,18 +55,27 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
- 
- #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
- /*
-- * WARNING: The entire pt_regs may not be safe to dereference.  In some cases,
-- * only the iret frame registers are accessible.  Use with caution!
-+ * If 'partial' returns true, only the iret frame registers are valid.
-  */
--static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
-+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
-+                                                  bool *partial)
- {
-       if (unwind_done(state))
-               return NULL;
- 
-+      if (partial) {
-+#ifdef CONFIG_UNWINDER_ORC
-+              *partial = !state->full_regs;
-+#else
-+              *partial = false;
-+#endif
-+      }
-+
-       return state->regs;
- }
- #else
--static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
-+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
-+                                                  bool *partial)
- {
-       return NULL;
- }
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index 19a936e9b259..8da5b487919f 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -76,12 +76,23 @@ void show_iret_regs(struct pt_regs *regs)
-               regs->sp, regs->flags);
- }
- 
--static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
-+static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
-+                                bool partial)
- {
--      if (on_stack(info, regs, sizeof(*regs)))
-+      /*
-+       * These on_stack() checks aren't strictly necessary: the unwind code
-+       * has already validated the 'regs' pointer.  The checks are done for
-+       * ordering reasons: if the registers are on the next stack, we don't
-+       * want to print them out yet.  Otherwise they'll be shown as part of
-+       * the wrong stack.  Later, when show_trace_log_lvl() switches to the
-+       * next stack, this function will be called again with the same regs so
-+       * they can be printed in the right context.
-+       */
-+      if (!partial && on_stack(info, regs, sizeof(*regs))) {
-               __show_regs(regs, 0);
--      else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
--                        IRET_FRAME_SIZE)) {
-+
-+      } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
-+                                     IRET_FRAME_SIZE)) {
-               /*
-                * When an interrupt or exception occurs in entry code, the
-                * full pt_regs might not have been saved yet.  In that case
-@@ -98,6 +109,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-       struct stack_info stack_info = {0};
-       unsigned long visit_mask = 0;
-       int graph_idx = 0;
-+      bool partial;
- 
-       printk("%sCall Trace:\n", log_lvl);
- 
-@@ -140,7 +152,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                       printk("%s <%s>\n", log_lvl, stack_name);
- 
-               if (regs)
--                      show_regs_safe(&stack_info, regs);
-+                      show_regs_if_on_stack(&stack_info, regs, partial);
- 
-               /*
-                * Scan the stack, printing any text addresses we find.  At the
-@@ -164,7 +176,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- 
-                       /*
-                        * Don't print regs->ip again if it was already printed
--                       * by show_regs_safe() below.
-+                       * by show_regs_if_on_stack().
-                        */
-                       if (regs && stack == &regs->ip) {
-                               unwind_next_frame(&state);
-@@ -200,9 +212,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                       unwind_next_frame(&state);
- 
-                       /* if the frame has entry regs, print them */
--                      regs = unwind_get_entry_regs(&state);
-+                      regs = unwind_get_entry_regs(&state, &partial);
-                       if (regs)
--                              show_regs_safe(&stack_info, regs);
-+                              show_regs_if_on_stack(&stack_info, regs, partial);
-               }
- 
-               if (stack_name)
-diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
-index 8dabd7bf1673..60244bfaf88f 100644
---- a/arch/x86/kernel/stacktrace.c
-+++ b/arch/x86/kernel/stacktrace.c
-@@ -98,7 +98,7 @@ static int __save_stack_trace_reliable(struct stack_trace *trace,
-       for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
-            unwind_next_frame(&state)) {
- 
--              regs = unwind_get_entry_regs(&state);
-+              regs = unwind_get_entry_regs(&state, NULL);
-               if (regs) {
-                       /*
-                        * Kernel mode registers on the stack indicate an
--- 
-2.14.2
-
diff --git a/patches/kernel/0246-x86-pti-Make-sure-the-user-kernel-PTEs-match.patch b/patches/kernel/0246-x86-pti-Make-sure-the-user-kernel-PTEs-match.patch

new file mode 100644 (file)

index 0000000..9f594a1
--- /dev/null
+++ b/patches/kernel/0246-x86-pti-Make-sure-the-user-kernel-PTEs-match.patch
@@ -0,0 +1,66 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 3 Jan 2018 15:57:59 +0100
+Subject: [PATCH] x86/pti: Make sure the user/kernel PTEs match
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Meelis reported that his K8 Athlon64 emits MCE warnings when PTI is
+enabled:
+
+[Hardware Error]: Error Addr: 0x0000ffff81e000e0
+[Hardware Error]: MC1 Error: L1 TLB multimatch.
+[Hardware Error]: cache level: L1, tx: INSN
+
+The address is in the entry area, which is mapped into kernel _AND_ user
+space. That's special because we switch CR3 while we are executing
+there.
+
+User mapping:
+0xffffffff81e00000-0xffffffff82000000           2M     ro         PSE     GLB x  pmd
+
+Kernel mapping:
+0xffffffff81000000-0xffffffff82000000          16M     ro         PSE         x  pmd
+
+So the K8 is complaining that the TLB entries differ. They differ in the
+GLB bit.
+
+Drop the GLB bit when installing the user shared mapping.
+
+Fixes: 6dc72c3cbca0 ("x86/mm/pti: Share entry text PMD")
+Reported-by: Meelis Roos <mroos@linux.ee>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Meelis Roos <mroos@linux.ee>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801031407180.1957@nanos
+(cherry picked from commit 52994c256df36fda9a715697431cba9daecb6b11)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8a95d206afc447d8461815c67e618bd8b2c6457f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/pti.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+index bce8aea65606..2da28ba97508 100644
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -367,7 +367,8 @@ static void __init pti_setup_espfix64(void)
+ static void __init pti_clone_entry_text(void)
+ {
+       pti_clone_pmds((unsigned long) __entry_text_start,
+-                      (unsigned long) __irqentry_text_end, _PAGE_RW);
++                      (unsigned long) __irqentry_text_end,
++                     _PAGE_RW | _PAGE_GLOBAL);
+ }
+ 
+ /*
+-- 
+2.14.2
+
diff --git a/patches/kernel/0247-x86-dumpstack-Fix-partial-register-dumps.patch b/patches/kernel/0247-x86-dumpstack-Fix-partial-register-dumps.patch

new file mode 100644 (file)

index 0000000..c450268
--- /dev/null
+++ b/patches/kernel/0247-x86-dumpstack-Fix-partial-register-dumps.patch
@@ -0,0 +1,172 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Sun, 31 Dec 2017 10:18:06 -0600
+Subject: [PATCH] x86/dumpstack: Fix partial register dumps
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The show_regs_safe() logic is wrong.  When there's an iret stack frame,
+it prints the entire pt_regs -- most of which is random stack data --
+instead of just the five registers at the end.
+
+show_regs_safe() is also poorly named: the on_stack() checks aren't for
+safety.  Rename the function to show_regs_if_on_stack() and add a
+comment to explain why the checks are needed.
+
+These issues were introduced with the "partial register dump" feature of
+the following commit:
+
+  b02fcf9ba121 ("x86/unwinder: Handle stack overflows more gracefully")
+
+That patch had gone through a few iterations of development, and the
+above issues were artifacts from a previous iteration of the patch where
+'regs' pointed directly to the iret frame rather than to the (partially
+empty) pt_regs.
+
+Tested-by: Alexander Tsoy <alexander@tsoy.me>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toralf Förster <toralf.foerster@gmx.de>
+Cc: stable@vger.kernel.org
+Fixes: b02fcf9ba121 ("x86/unwinder: Handle stack overflows more gracefully")
+Link: http://lkml.kernel.org/r/5b05b8b344f59db2d3d50dbdeba92d60f2304c54.1514736742.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit a9cdbe72c4e8bf3b38781c317a79326e2e1a230d)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 3f159d02ecca1ffe81dc467767833dd6d0345147)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/unwind.h | 17 +++++++++++++----
+ arch/x86/kernel/dumpstack.c   | 28 ++++++++++++++++++++--------
+ arch/x86/kernel/stacktrace.c  |  2 +-
+ 3 files changed, 34 insertions(+), 13 deletions(-)
+
+diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
+index 38fa6154e382..e1c1cb5019bc 100644
+--- a/arch/x86/include/asm/unwind.h
++++ b/arch/x86/include/asm/unwind.h
+@@ -55,18 +55,27 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
+ 
+ #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
+ /*
+- * WARNING: The entire pt_regs may not be safe to dereference.  In some cases,
+- * only the iret frame registers are accessible.  Use with caution!
++ * If 'partial' returns true, only the iret frame registers are valid.
+  */
+-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
++static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
++                                                  bool *partial)
+ {
+       if (unwind_done(state))
+               return NULL;
+ 
++      if (partial) {
++#ifdef CONFIG_UNWINDER_ORC
++              *partial = !state->full_regs;
++#else
++              *partial = false;
++#endif
++      }
++
+       return state->regs;
+ }
+ #else
+-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
++static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
++                                                  bool *partial)
+ {
+       return NULL;
+ }
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index 19a936e9b259..8da5b487919f 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -76,12 +76,23 @@ void show_iret_regs(struct pt_regs *regs)
+               regs->sp, regs->flags);
+ }
+ 
+-static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
++static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
++                                bool partial)
+ {
+-      if (on_stack(info, regs, sizeof(*regs)))
++      /*
++       * These on_stack() checks aren't strictly necessary: the unwind code
++       * has already validated the 'regs' pointer.  The checks are done for
++       * ordering reasons: if the registers are on the next stack, we don't
++       * want to print them out yet.  Otherwise they'll be shown as part of
++       * the wrong stack.  Later, when show_trace_log_lvl() switches to the
++       * next stack, this function will be called again with the same regs so
++       * they can be printed in the right context.
++       */
++      if (!partial && on_stack(info, regs, sizeof(*regs))) {
+               __show_regs(regs, 0);
+-      else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
+-                        IRET_FRAME_SIZE)) {
++
++      } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
++                                     IRET_FRAME_SIZE)) {
+               /*
+                * When an interrupt or exception occurs in entry code, the
+                * full pt_regs might not have been saved yet.  In that case
+@@ -98,6 +109,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+       struct stack_info stack_info = {0};
+       unsigned long visit_mask = 0;
+       int graph_idx = 0;
++      bool partial;
+ 
+       printk("%sCall Trace:\n", log_lvl);
+ 
+@@ -140,7 +152,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+                       printk("%s <%s>\n", log_lvl, stack_name);
+ 
+               if (regs)
+-                      show_regs_safe(&stack_info, regs);
++                      show_regs_if_on_stack(&stack_info, regs, partial);
+ 
+               /*
+                * Scan the stack, printing any text addresses we find.  At the
+@@ -164,7 +176,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ 
+                       /*
+                        * Don't print regs->ip again if it was already printed
+-                       * by show_regs_safe() below.
++                       * by show_regs_if_on_stack().
+                        */
+                       if (regs && stack == &regs->ip) {
+                               unwind_next_frame(&state);
+@@ -200,9 +212,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+                       unwind_next_frame(&state);
+ 
+                       /* if the frame has entry regs, print them */
+-                      regs = unwind_get_entry_regs(&state);
++                      regs = unwind_get_entry_regs(&state, &partial);
+                       if (regs)
+-                              show_regs_safe(&stack_info, regs);
++                              show_regs_if_on_stack(&stack_info, regs, partial);
+               }
+ 
+               if (stack_name)
+diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
+index 8dabd7bf1673..60244bfaf88f 100644
+--- a/arch/x86/kernel/stacktrace.c
++++ b/arch/x86/kernel/stacktrace.c
+@@ -98,7 +98,7 @@ static int __save_stack_trace_reliable(struct stack_trace *trace,
+       for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
+            unwind_next_frame(&state)) {
+ 
+-              regs = unwind_get_entry_regs(&state);
++              regs = unwind_get_entry_regs(&state, NULL);
+               if (regs) {
+                       /*
+                        * Kernel mode registers on the stack indicate an
+-- 
+2.14.2
+
diff --git a/patches/kernel/0247-x86-dumpstack-Print-registers-for-first-stack-frame.patch b/patches/kernel/0247-x86-dumpstack-Print-registers-for-first-stack-frame.patch

deleted file mode 100644 (file)

index 26b5a60..0000000
--- a/patches/kernel/0247-x86-dumpstack-Print-registers-for-first-stack-frame.patch
+++ /dev/null
@@ -1,58 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Sun, 31 Dec 2017 10:18:07 -0600
-Subject: [PATCH] x86/dumpstack: Print registers for first stack frame
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-In the stack dump code, if the frame after the starting pt_regs is also
-a regs frame, the registers don't get printed.  Fix that.
-
-Reported-by: Andy Lutomirski <luto@amacapital.net>
-Tested-by: Alexander Tsoy <alexander@tsoy.me>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: Toralf Förster <toralf.foerster@gmx.de>
-Cc: stable@vger.kernel.org
-Fixes: 3b3fa11bc700 ("x86/dumpstack: Print any pt_regs found on the stack")
-Link: http://lkml.kernel.org/r/396f84491d2f0ef64eda4217a2165f5712f6a115.1514736742.git.jpoimboe@redhat.com
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-(cherry picked from commit 3ffdeb1a02be3086f1411a15c5b9c481fa28e21f)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 3aef1ce621ae2eb0bd58e07cf9e66a859faa17cd)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/dumpstack.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index 8da5b487919f..042f80c50e3b 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -115,6 +115,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- 
-       unwind_start(&state, task, regs, stack);
-       stack = stack ? : get_stack_pointer(task, regs);
-+      regs = unwind_get_entry_regs(&state, &partial);
- 
-       /*
-        * Iterate through the stacks, starting with the current stack pointer.
-@@ -132,7 +133,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-        * - hardirq stack
-        * - entry stack
-        */
--      for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
-+      for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
-               const char *stack_name;
- 
-               if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
--- 
-2.14.2
-
diff --git a/patches/kernel/0248-x86-dumpstack-Print-registers-for-first-stack-frame.patch b/patches/kernel/0248-x86-dumpstack-Print-registers-for-first-stack-frame.patch

new file mode 100644 (file)

index 0000000..26b5a60
--- /dev/null
+++ b/patches/kernel/0248-x86-dumpstack-Print-registers-for-first-stack-frame.patch
@@ -0,0 +1,58 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Sun, 31 Dec 2017 10:18:07 -0600
+Subject: [PATCH] x86/dumpstack: Print registers for first stack frame
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+In the stack dump code, if the frame after the starting pt_regs is also
+a regs frame, the registers don't get printed.  Fix that.
+
+Reported-by: Andy Lutomirski <luto@amacapital.net>
+Tested-by: Alexander Tsoy <alexander@tsoy.me>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Toralf Förster <toralf.foerster@gmx.de>
+Cc: stable@vger.kernel.org
+Fixes: 3b3fa11bc700 ("x86/dumpstack: Print any pt_regs found on the stack")
+Link: http://lkml.kernel.org/r/396f84491d2f0ef64eda4217a2165f5712f6a115.1514736742.git.jpoimboe@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+(cherry picked from commit 3ffdeb1a02be3086f1411a15c5b9c481fa28e21f)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 3aef1ce621ae2eb0bd58e07cf9e66a859faa17cd)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/dumpstack.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index 8da5b487919f..042f80c50e3b 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -115,6 +115,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ 
+       unwind_start(&state, task, regs, stack);
+       stack = stack ? : get_stack_pointer(task, regs);
++      regs = unwind_get_entry_regs(&state, &partial);
+ 
+       /*
+        * Iterate through the stacks, starting with the current stack pointer.
+@@ -132,7 +133,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+        * - hardirq stack
+        * - entry stack
+        */
+-      for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
++      for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+               const char *stack_name;
+ 
+               if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
+-- 
+2.14.2
+
diff --git a/patches/kernel/0248-x86-process-Define-cpu_tss_rw-in-same-section-as-dec.patch b/patches/kernel/0248-x86-process-Define-cpu_tss_rw-in-same-section-as-dec.patch

deleted file mode 100644 (file)

index 24c5a1b..0000000
--- a/patches/kernel/0248-x86-process-Define-cpu_tss_rw-in-same-section-as-dec.patch
+++ /dev/null
@@ -1,64 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Nick Desaulniers <ndesaulniers@google.com>
-Date: Wed, 3 Jan 2018 12:39:52 -0800
-Subject: [PATCH] x86/process: Define cpu_tss_rw in same section as declaration
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-cpu_tss_rw is declared with DECLARE_PER_CPU_PAGE_ALIGNED
-but then defined with DEFINE_PER_CPU_SHARED_ALIGNED
-leading to section mismatch warnings.
-
-Use DEFINE_PER_CPU_PAGE_ALIGNED consistently. This is necessary because
-it's mapped to the cpu entry area and must be page aligned.
-
-[ tglx: Massaged changelog a bit ]
-
-Fixes: 1a935bc3d4ea ("x86/entry: Move SYSENTER_stack to the beginning of struct tss_struct")
-Suggested-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: thomas.lendacky@amd.com
-Cc: Borislav Petkov <bpetkov@suse.de>
-Cc: tklauser@distanz.ch
-Cc: minipli@googlemail.com
-Cc: me@kylehuey.com
-Cc: namit@vmware.com
-Cc: luto@kernel.org
-Cc: jpoimboe@redhat.com
-Cc: tj@kernel.org
-Cc: cl@linux.com
-Cc: bp@suse.de
-Cc: thgarnie@google.com
-Cc: kirill.shutemov@linux.intel.com
-Cc: stable@vger.kernel.org
-Link: https://lkml.kernel.org/r/20180103203954.183360-1-ndesaulniers@google.com
-
-(cherry picked from commit 2fd9c41aea47f4ad071accf94b94f94f2c4d31eb)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f45e574914ae47825d2eea46abc9d6fbabe55e56)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/process.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index 3688a7b9d055..07e6218ad7d9 100644
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -46,7 +46,7 @@
-  * section. Since TSS's are completely CPU-local, we want them
-  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
-  */
--__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
-+__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
-       .x86_tss = {
-               /*
-                * .sp0 is only used when entering ring 0 from a lower
--- 
-2.14.2
-
diff --git a/patches/kernel/0249-x86-process-Define-cpu_tss_rw-in-same-section-as-dec.patch b/patches/kernel/0249-x86-process-Define-cpu_tss_rw-in-same-section-as-dec.patch

new file mode 100644 (file)

index 0000000..24c5a1b
--- /dev/null
+++ b/patches/kernel/0249-x86-process-Define-cpu_tss_rw-in-same-section-as-dec.patch
@@ -0,0 +1,64 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Nick Desaulniers <ndesaulniers@google.com>
+Date: Wed, 3 Jan 2018 12:39:52 -0800
+Subject: [PATCH] x86/process: Define cpu_tss_rw in same section as declaration
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+cpu_tss_rw is declared with DECLARE_PER_CPU_PAGE_ALIGNED
+but then defined with DEFINE_PER_CPU_SHARED_ALIGNED
+leading to section mismatch warnings.
+
+Use DEFINE_PER_CPU_PAGE_ALIGNED consistently. This is necessary because
+it's mapped to the cpu entry area and must be page aligned.
+
+[ tglx: Massaged changelog a bit ]
+
+Fixes: 1a935bc3d4ea ("x86/entry: Move SYSENTER_stack to the beginning of struct tss_struct")
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: thomas.lendacky@amd.com
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: tklauser@distanz.ch
+Cc: minipli@googlemail.com
+Cc: me@kylehuey.com
+Cc: namit@vmware.com
+Cc: luto@kernel.org
+Cc: jpoimboe@redhat.com
+Cc: tj@kernel.org
+Cc: cl@linux.com
+Cc: bp@suse.de
+Cc: thgarnie@google.com
+Cc: kirill.shutemov@linux.intel.com
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20180103203954.183360-1-ndesaulniers@google.com
+
+(cherry picked from commit 2fd9c41aea47f4ad071accf94b94f94f2c4d31eb)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f45e574914ae47825d2eea46abc9d6fbabe55e56)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/process.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index 3688a7b9d055..07e6218ad7d9 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -46,7 +46,7 @@
+  * section. Since TSS's are completely CPU-local, we want them
+  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+  */
+-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
++__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
+       .x86_tss = {
+               /*
+                * .sp0 is only used when entering ring 0 from a lower
+-- 
+2.14.2
+
diff --git a/patches/kernel/0249-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch b/patches/kernel/0249-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch

deleted file mode 100644 (file)

index f8049c8..0000000
--- a/patches/kernel/0249-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch
+++ /dev/null
@@ -1,98 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Fri, 5 Jan 2018 15:27:34 +0100
-Subject: [PATCH] x86/pti: Rename BUG_CPU_INSECURE to BUG_CPU_MELTDOWN
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Use the name associated with the particular attack which needs page table
-isolation for mitigation.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Acked-by: David Woodhouse <dwmw@amazon.co.uk>
-Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
-Cc: Jiri Koshina <jikos@kernel.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Tim Chen <tim.c.chen@linux.intel.com>
-Cc: Andi Lutomirski  <luto@amacapital.net>
-Cc: Andi Kleen <ak@linux.intel.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Paul Turner <pjt@google.com>
-Cc: Tom Lendacky <thomas.lendacky@amd.com>
-Cc: Greg KH <gregkh@linux-foundation.org>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Kees Cook <keescook@google.com>
-Cc: stable@vger.kernel.org
-Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801051525300.1724@nanos
-
-(cherry picked from commit de791821c295cc61419a06fe5562288417d1bc58)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit aefb6725ee33758a2869c37e22dbc7ca80548007)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cpufeatures.h | 2 +-
- arch/x86/kernel/cpu/common.c       | 2 +-
- arch/x86/mm/pti.c                  | 6 +++---
- 3 files changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index 9b0c283afcf0..b7900d26066c 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -340,6 +340,6 @@
- #define X86_BUG_SWAPGS_FENCE          X86_BUG(11) /* SWAPGS without input dep on GS */
- #define X86_BUG_MONITOR                       X86_BUG(12) /* IPI required to wake up remote CPU */
- #define X86_BUG_AMD_E400              X86_BUG(13) /* CPU is among the affected by Erratum 400 */
--#define X86_BUG_CPU_INSECURE          X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
-+#define X86_BUG_CPU_MELTDOWN          X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
- 
- #endif /* _ASM_X86_CPUFEATURES_H */
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 1854dd8071a6..142ab555dafa 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -900,7 +900,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
-       setup_force_cpu_cap(X86_FEATURE_ALWAYS);
- 
-       if (c->x86_vendor != X86_VENDOR_AMD)
--              setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
-+              setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
- 
-       fpu__init_system(c);
- }
-diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
-index 2da28ba97508..43d4a4a29037 100644
---- a/arch/x86/mm/pti.c
-+++ b/arch/x86/mm/pti.c
-@@ -56,13 +56,13 @@
- 
- static void __init pti_print_if_insecure(const char *reason)
- {
--      if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
-+      if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
-               pr_info("%s\n", reason);
- }
- 
- static void __init pti_print_if_secure(const char *reason)
- {
--      if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
-+      if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
-               pr_info("%s\n", reason);
- }
- 
-@@ -96,7 +96,7 @@ void __init pti_check_boottime_disable(void)
-       }
- 
- autosel:
--      if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
-+      if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
-               return;
- enable:
-       setup_force_cpu_cap(X86_FEATURE_PTI);
--- 
-2.14.2
-
diff --git a/patches/kernel/0250-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch b/patches/kernel/0250-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch

new file mode 100644 (file)

index 0000000..f8049c8
--- /dev/null
+++ b/patches/kernel/0250-x86-pti-Rename-BUG_CPU_INSECURE-to-BUG_CPU_MELTDOWN.patch
@@ -0,0 +1,98 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 5 Jan 2018 15:27:34 +0100
+Subject: [PATCH] x86/pti: Rename BUG_CPU_INSECURE to BUG_CPU_MELTDOWN
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Use the name associated with the particular attack which needs page table
+isolation for mitigation.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
+Cc: Jiri Koshina <jikos@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Andi Lutomirski  <luto@amacapital.net>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Paul Turner <pjt@google.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Greg KH <gregkh@linux-foundation.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801051525300.1724@nanos
+
+(cherry picked from commit de791821c295cc61419a06fe5562288417d1bc58)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit aefb6725ee33758a2869c37e22dbc7ca80548007)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cpufeatures.h | 2 +-
+ arch/x86/kernel/cpu/common.c       | 2 +-
+ arch/x86/mm/pti.c                  | 6 +++---
+ 3 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 9b0c283afcf0..b7900d26066c 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -340,6 +340,6 @@
+ #define X86_BUG_SWAPGS_FENCE          X86_BUG(11) /* SWAPGS without input dep on GS */
+ #define X86_BUG_MONITOR                       X86_BUG(12) /* IPI required to wake up remote CPU */
+ #define X86_BUG_AMD_E400              X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+-#define X86_BUG_CPU_INSECURE          X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
++#define X86_BUG_CPU_MELTDOWN          X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+ 
+ #endif /* _ASM_X86_CPUFEATURES_H */
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 1854dd8071a6..142ab555dafa 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -900,7 +900,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
+       setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+ 
+       if (c->x86_vendor != X86_VENDOR_AMD)
+-              setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
++              setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ 
+       fpu__init_system(c);
+ }
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+index 2da28ba97508..43d4a4a29037 100644
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -56,13 +56,13 @@
+ 
+ static void __init pti_print_if_insecure(const char *reason)
+ {
+-      if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
++      if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+               pr_info("%s\n", reason);
+ }
+ 
+ static void __init pti_print_if_secure(const char *reason)
+ {
+-      if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
++      if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+               pr_info("%s\n", reason);
+ }
+ 
+@@ -96,7 +96,7 @@ void __init pti_check_boottime_disable(void)
+       }
+ 
+ autosel:
+-      if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
++      if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+               return;
+ enable:
+       setup_force_cpu_cap(X86_FEATURE_PTI);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0250-x86-pti-Unbreak-EFI-old_memmap.patch b/patches/kernel/0250-x86-pti-Unbreak-EFI-old_memmap.patch

deleted file mode 100644 (file)

index 0aca0ec..0000000
--- a/patches/kernel/0250-x86-pti-Unbreak-EFI-old_memmap.patch
+++ /dev/null
@@ -1,61 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jiri Kosina <jkosina@suse.cz>
-Date: Fri, 5 Jan 2018 22:35:41 +0100
-Subject: [PATCH] x86/pti: Unbreak EFI old_memmap
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-EFI_OLD_MEMMAP's efi_call_phys_prolog() calls set_pgd() with swapper PGD that
-has PAGE_USER set, which makes PTI set NX on it, and therefore EFI can't
-execute it's code.
-
-Fix that by forcefully clearing _PAGE_NX from the PGD (this can't be done
-by the pgprot API).
-
-_PAGE_NX will be automatically reintroduced in efi_call_phys_epilog(), as
-_set_pgd() will again notice that this is _PAGE_USER, and set _PAGE_NX on
-it.
-
-Tested-by: Dimitri Sivanich <sivanich@hpe.com>
-Signed-off-by: Jiri Kosina <jkosina@suse.cz>
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
-Cc: Andrea Arcangeli <aarcange@redhat.com>
-Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Matt Fleming <matt@codeblueprint.co.uk>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-efi@vger.kernel.org
-Cc: stable@vger.kernel.org
-Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1801052215460.11852@cbobk.fhfr.pm
-(cherry picked from commit de53c3786a3ce162a1c815d0c04c766c23ec9c0a)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 31afacd8089f54061e718e5d491f11747755c503)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/platform/efi/efi_64.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
-index b104224d3d6c..987a38e82f73 100644
---- a/arch/x86/platform/efi/efi_64.c
-+++ b/arch/x86/platform/efi/efi_64.c
-@@ -133,7 +133,9 @@ pgd_t * __init efi_call_phys_prolog(void)
-                               pud[j] = *pud_offset(p4d_k, vaddr);
-                       }
-               }
-+              pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
-       }
-+
- out:
-       __flush_tlb_all();
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0251-x86-Documentation-Add-PTI-description.patch b/patches/kernel/0251-x86-Documentation-Add-PTI-description.patch

deleted file mode 100644 (file)

index 6a38eaa..0000000
--- a/patches/kernel/0251-x86-Documentation-Add-PTI-description.patch
+++ /dev/null
@@ -1,275 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Fri, 5 Jan 2018 09:44:36 -0800
-Subject: [PATCH] x86/Documentation: Add PTI description
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Add some details about how PTI works, what some of the downsides
-are, and how to debug it when things go wrong.
-
-Also document the kernel parameter: 'pti/nopti'.
-
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
-Reviewed-by: Kees Cook <keescook@chromium.org>
-Cc: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
-Cc: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
-Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
-Cc: Richard Fellner <richard.fellner@student.tugraz.at>
-Cc: Andy Lutomirski <luto@kernel.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Hugh Dickins <hughd@google.com>
-Cc: Andi Lutomirsky <luto@kernel.org>
-Cc: stable@vger.kernel.org
-Link: https://lkml.kernel.org/r/20180105174436.1BC6FA2B@viggo.jf.intel.com
-
-(cherry picked from commit 01c9b17bf673b05bb401b76ec763e9730ccf1376)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 1acf87c45b0170e717fc1b06a2d6fef47e07f79b)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/admin-guide/kernel-parameters.txt |  21 ++-
- Documentation/x86/pti.txt                       | 186 ++++++++++++++++++++++++
- 2 files changed, 200 insertions(+), 7 deletions(-)
- create mode 100644 Documentation/x86/pti.txt
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index b4d2edf316db..1a6ebc6cdf26 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -2677,8 +2677,6 @@
-                       steal time is computed, but won't influence scheduler
-                       behaviour
- 
--      nopti           [X86-64] Disable kernel page table isolation
--
-       nolapic         [X86-32,APIC] Do not enable or use the local APIC.
- 
-       nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
-@@ -3247,11 +3245,20 @@
-       pt.             [PARIDE]
-                       See Documentation/blockdev/paride.txt.
- 
--      pti=            [X86_64]
--                      Control user/kernel address space isolation:
--                      on - enable
--                      off - disable
--                      auto - default setting
-+      pti=            [X86_64] Control Page Table Isolation of user and
-+                      kernel address spaces.  Disabling this feature
-+                      removes hardening, but improves performance of
-+                      system calls and interrupts.
-+
-+                      on   - unconditionally enable
-+                      off  - unconditionally disable
-+                      auto - kernel detects whether your CPU model is
-+                             vulnerable to issues that PTI mitigates
-+
-+                      Not specifying this option is equivalent to pti=auto.
-+
-+      nopti           [X86_64]
-+                      Equivalent to pti=off
- 
-       pty.legacy_count=
-                       [KNL] Number of legacy pty's. Overwrites compiled-in
-diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
-new file mode 100644
-index 000000000000..d11eff61fc9a
---- /dev/null
-+++ b/Documentation/x86/pti.txt
-@@ -0,0 +1,186 @@
-+Overview
-+========
-+
-+Page Table Isolation (pti, previously known as KAISER[1]) is a
-+countermeasure against attacks on the shared user/kernel address
-+space such as the "Meltdown" approach[2].
-+
-+To mitigate this class of attacks, we create an independent set of
-+page tables for use only when running userspace applications.  When
-+the kernel is entered via syscalls, interrupts or exceptions, the
-+page tables are switched to the full "kernel" copy.  When the system
-+switches back to user mode, the user copy is used again.
-+
-+The userspace page tables contain only a minimal amount of kernel
-+data: only what is needed to enter/exit the kernel such as the
-+entry/exit functions themselves and the interrupt descriptor table
-+(IDT).  There are a few strictly unnecessary things that get mapped
-+such as the first C function when entering an interrupt (see
-+comments in pti.c).
-+
-+This approach helps to ensure that side-channel attacks leveraging
-+the paging structures do not function when PTI is enabled.  It can be
-+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
-+Once enabled at compile-time, it can be disabled at boot with the
-+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
-+
-+Page Table Management
-+=====================
-+
-+When PTI is enabled, the kernel manages two sets of page tables.
-+The first set is very similar to the single set which is present in
-+kernels without PTI.  This includes a complete mapping of userspace
-+that the kernel can use for things like copy_to_user().
-+
-+Although _complete_, the user portion of the kernel page tables is
-+crippled by setting the NX bit in the top level.  This ensures
-+that any missed kernel->user CR3 switch will immediately crash
-+userspace upon executing its first instruction.
-+
-+The userspace page tables map only the kernel data needed to enter
-+and exit the kernel.  This data is entirely contained in the 'struct
-+cpu_entry_area' structure which is placed in the fixmap which gives
-+each CPU's copy of the area a compile-time-fixed virtual address.
-+
-+For new userspace mappings, the kernel makes the entries in its
-+page tables like normal.  The only difference is when the kernel
-+makes entries in the top (PGD) level.  In addition to setting the
-+entry in the main kernel PGD, a copy of the entry is made in the
-+userspace page tables' PGD.
-+
-+This sharing at the PGD level also inherently shares all the lower
-+layers of the page tables.  This leaves a single, shared set of
-+userspace page tables to manage.  One PTE to lock, one set of
-+accessed bits, dirty bits, etc...
-+
-+Overhead
-+========
-+
-+Protection against side-channel attacks is important.  But,
-+this protection comes at a cost:
-+
-+1. Increased Memory Use
-+  a. Each process now needs an order-1 PGD instead of order-0.
-+     (Consumes an additional 4k per process).
-+  b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
-+     aligned so that it can be mapped by setting a single PMD
-+     entry.  This consumes nearly 2MB of RAM once the kernel
-+     is decompressed, but no space in the kernel image itself.
-+
-+2. Runtime Cost
-+  a. CR3 manipulation to switch between the page table copies
-+     must be done at interrupt, syscall, and exception entry
-+     and exit (it can be skipped when the kernel is interrupted,
-+     though.)  Moves to CR3 are on the order of a hundred
-+     cycles, and are required at every entry and exit.
-+  b. A "trampoline" must be used for SYSCALL entry.  This
-+     trampoline depends on a smaller set of resources than the
-+     non-PTI SYSCALL entry code, so requires mapping fewer
-+     things into the userspace page tables.  The downside is
-+     that stacks must be switched at entry time.
-+  d. Global pages are disabled for all kernel structures not
-+     mapped into both kernel and userspace page tables.  This
-+     feature of the MMU allows different processes to share TLB
-+     entries mapping the kernel.  Losing the feature means more
-+     TLB misses after a context switch.  The actual loss of
-+     performance is very small, however, never exceeding 1%.
-+  d. Process Context IDentifiers (PCID) is a CPU feature that
-+     allows us to skip flushing the entire TLB when switching page
-+     tables by setting a special bit in CR3 when the page tables
-+     are changed.  This makes switching the page tables (at context
-+     switch, or kernel entry/exit) cheaper.  But, on systems with
-+     PCID support, the context switch code must flush both the user
-+     and kernel entries out of the TLB.  The user PCID TLB flush is
-+     deferred until the exit to userspace, minimizing the cost.
-+     See intel.com/sdm for the gory PCID/INVPCID details.
-+  e. The userspace page tables must be populated for each new
-+     process.  Even without PTI, the shared kernel mappings
-+     are created by copying top-level (PGD) entries into each
-+     new process.  But, with PTI, there are now *two* kernel
-+     mappings: one in the kernel page tables that maps everything
-+     and one for the entry/exit structures.  At fork(), we need to
-+     copy both.
-+  f. In addition to the fork()-time copying, there must also
-+     be an update to the userspace PGD any time a set_pgd() is done
-+     on a PGD used to map userspace.  This ensures that the kernel
-+     and userspace copies always map the same userspace
-+     memory.
-+  g. On systems without PCID support, each CR3 write flushes
-+     the entire TLB.  That means that each syscall, interrupt
-+     or exception flushes the TLB.
-+  h. INVPCID is a TLB-flushing instruction which allows flushing
-+     of TLB entries for non-current PCIDs.  Some systems support
-+     PCIDs, but do not support INVPCID.  On these systems, addresses
-+     can only be flushed from the TLB for the current PCID.  When
-+     flushing a kernel address, we need to flush all PCIDs, so a
-+     single kernel address flush will require a TLB-flushing CR3
-+     write upon the next use of every PCID.
-+
-+Possible Future Work
-+====================
-+1. We can be more careful about not actually writing to CR3
-+   unless its value is actually changed.
-+2. Allow PTI to be enabled/disabled at runtime in addition to the
-+   boot-time switching.
-+
-+Testing
-+========
-+
-+To test stability of PTI, the following test procedure is recommended,
-+ideally doing all of these in parallel:
-+
-+1. Set CONFIG_DEBUG_ENTRY=y
-+2. Run several copies of all of the tools/testing/selftests/x86/ tests
-+   (excluding MPX and protection_keys) in a loop on multiple CPUs for
-+   several minutes.  These tests frequently uncover corner cases in the
-+   kernel entry code.  In general, old kernels might cause these tests
-+   themselves to crash, but they should never crash the kernel.
-+3. Run the 'perf' tool in a mode (top or record) that generates many
-+   frequent performance monitoring non-maskable interrupts (see "NMI"
-+   in /proc/interrupts).  This exercises the NMI entry/exit code which
-+   is known to trigger bugs in code paths that did not expect to be
-+   interrupted, including nested NMIs.  Using "-c" boosts the rate of
-+   NMIs, and using two -c with separate counters encourages nested NMIs
-+   and less deterministic behavior.
-+
-+      while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
-+
-+4. Launch a KVM virtual machine.
-+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
-+   This has been a lightly-tested code path and needs extra scrutiny.
-+
-+Debugging
-+=========
-+
-+Bugs in PTI cause a few different signatures of crashes
-+that are worth noting here.
-+
-+ * Failures of the selftests/x86 code.  Usually a bug in one of the
-+   more obscure corners of entry_64.S
-+ * Crashes in early boot, especially around CPU bringup.  Bugs
-+   in the trampoline code or mappings cause these.
-+ * Crashes at the first interrupt.  Caused by bugs in entry_64.S,
-+   like screwing up a page table switch.  Also caused by
-+   incorrectly mapping the IRQ handler entry code.
-+ * Crashes at the first NMI.  The NMI code is separate from main
-+   interrupt handlers and can have bugs that do not affect
-+   normal interrupts.  Also caused by incorrectly mapping NMI
-+   code.  NMIs that interrupt the entry code must be very
-+   careful and can be the cause of crashes that show up when
-+   running perf.
-+ * Kernel crashes at the first exit to userspace.  entry_64.S
-+   bugs, or failing to map some of the exit code.
-+ * Crashes at first interrupt that interrupts userspace. The paths
-+   in entry_64.S that return to userspace are sometimes separate
-+   from the ones that return to the kernel.
-+ * Double faults: overflowing the kernel stack because of page
-+   faults upon page faults.  Caused by touching non-pti-mapped
-+   data in the entry code, or forgetting to switch to kernel
-+   CR3 before calling into C functions which are not pti-mapped.
-+ * Userspace segfaults early in boot, sometimes manifesting
-+   as mount(8) failing to mount the rootfs.  These have
-+   tended to be TLB invalidation issues.  Usually invalidating
-+   the wrong PCID, or otherwise missing an invalidation.
-+
-+1. https://gruss.cc/files/kaiser.pdf
-+2. https://meltdownattack.com/meltdown.pdf
--- 
-2.14.2
-
diff --git a/patches/kernel/0251-x86-pti-Unbreak-EFI-old_memmap.patch b/patches/kernel/0251-x86-pti-Unbreak-EFI-old_memmap.patch

new file mode 100644 (file)

index 0000000..0aca0ec
--- /dev/null
+++ b/patches/kernel/0251-x86-pti-Unbreak-EFI-old_memmap.patch
@@ -0,0 +1,61 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Fri, 5 Jan 2018 22:35:41 +0100
+Subject: [PATCH] x86/pti: Unbreak EFI old_memmap
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+EFI_OLD_MEMMAP's efi_call_phys_prolog() calls set_pgd() with swapper PGD that
+has PAGE_USER set, which makes PTI set NX on it, and therefore EFI can't
+execute it's code.
+
+Fix that by forcefully clearing _PAGE_NX from the PGD (this can't be done
+by the pgprot API).
+
+_PAGE_NX will be automatically reintroduced in efi_call_phys_epilog(), as
+_set_pgd() will again notice that this is _PAGE_USER, and set _PAGE_NX on
+it.
+
+Tested-by: Dimitri Sivanich <sivanich@hpe.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matt Fleming <matt@codeblueprint.co.uk>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-efi@vger.kernel.org
+Cc: stable@vger.kernel.org
+Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1801052215460.11852@cbobk.fhfr.pm
+(cherry picked from commit de53c3786a3ce162a1c815d0c04c766c23ec9c0a)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 31afacd8089f54061e718e5d491f11747755c503)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/platform/efi/efi_64.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
+index b104224d3d6c..987a38e82f73 100644
+--- a/arch/x86/platform/efi/efi_64.c
++++ b/arch/x86/platform/efi/efi_64.c
+@@ -133,7 +133,9 @@ pgd_t * __init efi_call_phys_prolog(void)
+                               pud[j] = *pud_offset(p4d_k, vaddr);
+                       }
+               }
++              pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
+       }
++
+ out:
+       __flush_tlb_all();
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0252-x86-Documentation-Add-PTI-description.patch b/patches/kernel/0252-x86-Documentation-Add-PTI-description.patch

new file mode 100644 (file)

index 0000000..6a38eaa
--- /dev/null
+++ b/patches/kernel/0252-x86-Documentation-Add-PTI-description.patch
@@ -0,0 +1,275 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Fri, 5 Jan 2018 09:44:36 -0800
+Subject: [PATCH] x86/Documentation: Add PTI description
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Add some details about how PTI works, what some of the downsides
+are, and how to debug it when things go wrong.
+
+Also document the kernel parameter: 'pti/nopti'.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Cc: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
+Cc: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+Cc: Richard Fellner <richard.fellner@student.tugraz.at>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Andi Lutomirsky <luto@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20180105174436.1BC6FA2B@viggo.jf.intel.com
+
+(cherry picked from commit 01c9b17bf673b05bb401b76ec763e9730ccf1376)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 1acf87c45b0170e717fc1b06a2d6fef47e07f79b)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  21 ++-
+ Documentation/x86/pti.txt                       | 186 ++++++++++++++++++++++++
+ 2 files changed, 200 insertions(+), 7 deletions(-)
+ create mode 100644 Documentation/x86/pti.txt
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index b4d2edf316db..1a6ebc6cdf26 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -2677,8 +2677,6 @@
+                       steal time is computed, but won't influence scheduler
+                       behaviour
+ 
+-      nopti           [X86-64] Disable kernel page table isolation
+-
+       nolapic         [X86-32,APIC] Do not enable or use the local APIC.
+ 
+       nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
+@@ -3247,11 +3245,20 @@
+       pt.             [PARIDE]
+                       See Documentation/blockdev/paride.txt.
+ 
+-      pti=            [X86_64]
+-                      Control user/kernel address space isolation:
+-                      on - enable
+-                      off - disable
+-                      auto - default setting
++      pti=            [X86_64] Control Page Table Isolation of user and
++                      kernel address spaces.  Disabling this feature
++                      removes hardening, but improves performance of
++                      system calls and interrupts.
++
++                      on   - unconditionally enable
++                      off  - unconditionally disable
++                      auto - kernel detects whether your CPU model is
++                             vulnerable to issues that PTI mitigates
++
++                      Not specifying this option is equivalent to pti=auto.
++
++      nopti           [X86_64]
++                      Equivalent to pti=off
+ 
+       pty.legacy_count=
+                       [KNL] Number of legacy pty's. Overwrites compiled-in
+diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
+new file mode 100644
+index 000000000000..d11eff61fc9a
+--- /dev/null
++++ b/Documentation/x86/pti.txt
+@@ -0,0 +1,186 @@
++Overview
++========
++
++Page Table Isolation (pti, previously known as KAISER[1]) is a
++countermeasure against attacks on the shared user/kernel address
++space such as the "Meltdown" approach[2].
++
++To mitigate this class of attacks, we create an independent set of
++page tables for use only when running userspace applications.  When
++the kernel is entered via syscalls, interrupts or exceptions, the
++page tables are switched to the full "kernel" copy.  When the system
++switches back to user mode, the user copy is used again.
++
++The userspace page tables contain only a minimal amount of kernel
++data: only what is needed to enter/exit the kernel such as the
++entry/exit functions themselves and the interrupt descriptor table
++(IDT).  There are a few strictly unnecessary things that get mapped
++such as the first C function when entering an interrupt (see
++comments in pti.c).
++
++This approach helps to ensure that side-channel attacks leveraging
++the paging structures do not function when PTI is enabled.  It can be
++enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
++Once enabled at compile-time, it can be disabled at boot with the
++'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
++
++Page Table Management
++=====================
++
++When PTI is enabled, the kernel manages two sets of page tables.
++The first set is very similar to the single set which is present in
++kernels without PTI.  This includes a complete mapping of userspace
++that the kernel can use for things like copy_to_user().
++
++Although _complete_, the user portion of the kernel page tables is
++crippled by setting the NX bit in the top level.  This ensures
++that any missed kernel->user CR3 switch will immediately crash
++userspace upon executing its first instruction.
++
++The userspace page tables map only the kernel data needed to enter
++and exit the kernel.  This data is entirely contained in the 'struct
++cpu_entry_area' structure which is placed in the fixmap which gives
++each CPU's copy of the area a compile-time-fixed virtual address.
++
++For new userspace mappings, the kernel makes the entries in its
++page tables like normal.  The only difference is when the kernel
++makes entries in the top (PGD) level.  In addition to setting the
++entry in the main kernel PGD, a copy of the entry is made in the
++userspace page tables' PGD.
++
++This sharing at the PGD level also inherently shares all the lower
++layers of the page tables.  This leaves a single, shared set of
++userspace page tables to manage.  One PTE to lock, one set of
++accessed bits, dirty bits, etc...
++
++Overhead
++========
++
++Protection against side-channel attacks is important.  But,
++this protection comes at a cost:
++
++1. Increased Memory Use
++  a. Each process now needs an order-1 PGD instead of order-0.
++     (Consumes an additional 4k per process).
++  b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
++     aligned so that it can be mapped by setting a single PMD
++     entry.  This consumes nearly 2MB of RAM once the kernel
++     is decompressed, but no space in the kernel image itself.
++
++2. Runtime Cost
++  a. CR3 manipulation to switch between the page table copies
++     must be done at interrupt, syscall, and exception entry
++     and exit (it can be skipped when the kernel is interrupted,
++     though.)  Moves to CR3 are on the order of a hundred
++     cycles, and are required at every entry and exit.
++  b. A "trampoline" must be used for SYSCALL entry.  This
++     trampoline depends on a smaller set of resources than the
++     non-PTI SYSCALL entry code, so requires mapping fewer
++     things into the userspace page tables.  The downside is
++     that stacks must be switched at entry time.
++  d. Global pages are disabled for all kernel structures not
++     mapped into both kernel and userspace page tables.  This
++     feature of the MMU allows different processes to share TLB
++     entries mapping the kernel.  Losing the feature means more
++     TLB misses after a context switch.  The actual loss of
++     performance is very small, however, never exceeding 1%.
++  d. Process Context IDentifiers (PCID) is a CPU feature that
++     allows us to skip flushing the entire TLB when switching page
++     tables by setting a special bit in CR3 when the page tables
++     are changed.  This makes switching the page tables (at context
++     switch, or kernel entry/exit) cheaper.  But, on systems with
++     PCID support, the context switch code must flush both the user
++     and kernel entries out of the TLB.  The user PCID TLB flush is
++     deferred until the exit to userspace, minimizing the cost.
++     See intel.com/sdm for the gory PCID/INVPCID details.
++  e. The userspace page tables must be populated for each new
++     process.  Even without PTI, the shared kernel mappings
++     are created by copying top-level (PGD) entries into each
++     new process.  But, with PTI, there are now *two* kernel
++     mappings: one in the kernel page tables that maps everything
++     and one for the entry/exit structures.  At fork(), we need to
++     copy both.
++  f. In addition to the fork()-time copying, there must also
++     be an update to the userspace PGD any time a set_pgd() is done
++     on a PGD used to map userspace.  This ensures that the kernel
++     and userspace copies always map the same userspace
++     memory.
++  g. On systems without PCID support, each CR3 write flushes
++     the entire TLB.  That means that each syscall, interrupt
++     or exception flushes the TLB.
++  h. INVPCID is a TLB-flushing instruction which allows flushing
++     of TLB entries for non-current PCIDs.  Some systems support
++     PCIDs, but do not support INVPCID.  On these systems, addresses
++     can only be flushed from the TLB for the current PCID.  When
++     flushing a kernel address, we need to flush all PCIDs, so a
++     single kernel address flush will require a TLB-flushing CR3
++     write upon the next use of every PCID.
++
++Possible Future Work
++====================
++1. We can be more careful about not actually writing to CR3
++   unless its value is actually changed.
++2. Allow PTI to be enabled/disabled at runtime in addition to the
++   boot-time switching.
++
++Testing
++========
++
++To test stability of PTI, the following test procedure is recommended,
++ideally doing all of these in parallel:
++
++1. Set CONFIG_DEBUG_ENTRY=y
++2. Run several copies of all of the tools/testing/selftests/x86/ tests
++   (excluding MPX and protection_keys) in a loop on multiple CPUs for
++   several minutes.  These tests frequently uncover corner cases in the
++   kernel entry code.  In general, old kernels might cause these tests
++   themselves to crash, but they should never crash the kernel.
++3. Run the 'perf' tool in a mode (top or record) that generates many
++   frequent performance monitoring non-maskable interrupts (see "NMI"
++   in /proc/interrupts).  This exercises the NMI entry/exit code which
++   is known to trigger bugs in code paths that did not expect to be
++   interrupted, including nested NMIs.  Using "-c" boosts the rate of
++   NMIs, and using two -c with separate counters encourages nested NMIs
++   and less deterministic behavior.
++
++      while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
++
++4. Launch a KVM virtual machine.
++5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
++   This has been a lightly-tested code path and needs extra scrutiny.
++
++Debugging
++=========
++
++Bugs in PTI cause a few different signatures of crashes
++that are worth noting here.
++
++ * Failures of the selftests/x86 code.  Usually a bug in one of the
++   more obscure corners of entry_64.S
++ * Crashes in early boot, especially around CPU bringup.  Bugs
++   in the trampoline code or mappings cause these.
++ * Crashes at the first interrupt.  Caused by bugs in entry_64.S,
++   like screwing up a page table switch.  Also caused by
++   incorrectly mapping the IRQ handler entry code.
++ * Crashes at the first NMI.  The NMI code is separate from main
++   interrupt handlers and can have bugs that do not affect
++   normal interrupts.  Also caused by incorrectly mapping NMI
++   code.  NMIs that interrupt the entry code must be very
++   careful and can be the cause of crashes that show up when
++   running perf.
++ * Kernel crashes at the first exit to userspace.  entry_64.S
++   bugs, or failing to map some of the exit code.
++ * Crashes at first interrupt that interrupts userspace. The paths
++   in entry_64.S that return to userspace are sometimes separate
++   from the ones that return to the kernel.
++ * Double faults: overflowing the kernel stack because of page
++   faults upon page faults.  Caused by touching non-pti-mapped
++   data in the entry code, or forgetting to switch to kernel
++   CR3 before calling into C functions which are not pti-mapped.
++ * Userspace segfaults early in boot, sometimes manifesting
++   as mount(8) failing to mount the rootfs.  These have
++   tended to be TLB invalidation issues.  Usually invalidating
++   the wrong PCID, or otherwise missing an invalidation.
++
++1. https://gruss.cc/files/kaiser.pdf
++2. https://meltdownattack.com/meltdown.pdf
+-- 
+2.14.2
+
diff --git a/patches/kernel/0252-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch b/patches/kernel/0252-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch

deleted file mode 100644 (file)

index 0e9d104..0000000
--- a/patches/kernel/0252-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch
+++ /dev/null
@@ -1,68 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: David Woodhouse <dwmw@amazon.co.uk>
-Date: Sat, 6 Jan 2018 11:49:23 +0000
-Subject: [PATCH] x86/cpufeatures: Add X86_BUG_SPECTRE_V[12]
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-Add the bug bits for spectre v1/2 and force them unconditionally for all
-cpus.
-
-Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: gnomes@lxorguk.ukuu.org.uk
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Andi Kleen <ak@linux.intel.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Jiri Kosina <jikos@kernel.org>
-Cc: Andy Lutomirski <luto@amacapital.net>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Kees Cook <keescook@google.com>
-Cc: Tim Chen <tim.c.chen@linux.intel.com>
-Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
-Cc: Paul Turner <pjt@google.com>
-Cc: stable@vger.kernel.org
-Link: https://lkml.kernel.org/r/1515239374-23361-2-git-send-email-dwmw@amazon.co.uk
-(cherry picked from commit 99c6fa2511d8a683e61468be91b83f85452115fa)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit de861dbf4587b9dac9a1978e6349199755e8c1b1)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cpufeatures.h | 2 ++
- arch/x86/kernel/cpu/common.c       | 3 +++
- 2 files changed, 5 insertions(+)
-
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index b7900d26066c..3928050b51b0 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -341,5 +341,7 @@
- #define X86_BUG_MONITOR                       X86_BUG(12) /* IPI required to wake up remote CPU */
- #define X86_BUG_AMD_E400              X86_BUG(13) /* CPU is among the affected by Erratum 400 */
- #define X86_BUG_CPU_MELTDOWN          X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
-+#define X86_BUG_SPECTRE_V1            X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
-+#define X86_BUG_SPECTRE_V2            X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
- 
- #endif /* _ASM_X86_CPUFEATURES_H */
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
-index 142ab555dafa..01abbf69d522 100644
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -902,6 +902,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
-       if (c->x86_vendor != X86_VENDOR_AMD)
-               setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
- 
-+      setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
-+      setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
-+
-       fpu__init_system(c);
- }
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0253-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch b/patches/kernel/0253-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch

new file mode 100644 (file)

index 0000000..0e9d104
--- /dev/null
+++ b/patches/kernel/0253-x86-cpufeatures-Add-X86_BUG_SPECTRE_V-12.patch
@@ -0,0 +1,68 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Sat, 6 Jan 2018 11:49:23 +0000
+Subject: [PATCH] x86/cpufeatures: Add X86_BUG_SPECTRE_V[12]
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+Add the bug bits for spectre v1/2 and force them unconditionally for all
+cpus.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/1515239374-23361-2-git-send-email-dwmw@amazon.co.uk
+(cherry picked from commit 99c6fa2511d8a683e61468be91b83f85452115fa)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit de861dbf4587b9dac9a1978e6349199755e8c1b1)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cpufeatures.h | 2 ++
+ arch/x86/kernel/cpu/common.c       | 3 +++
+ 2 files changed, 5 insertions(+)
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index b7900d26066c..3928050b51b0 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -341,5 +341,7 @@
+ #define X86_BUG_MONITOR                       X86_BUG(12) /* IPI required to wake up remote CPU */
+ #define X86_BUG_AMD_E400              X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+ #define X86_BUG_CPU_MELTDOWN          X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
++#define X86_BUG_SPECTRE_V1            X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
++#define X86_BUG_SPECTRE_V2            X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+ 
+ #endif /* _ASM_X86_CPUFEATURES_H */
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 142ab555dafa..01abbf69d522 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -902,6 +902,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
+       if (c->x86_vendor != X86_VENDOR_AMD)
+               setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ 
++      setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
++      setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
++
+       fpu__init_system(c);
+ }
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0253-x86-tboot-Unbreak-tboot-with-PTI-enabled.patch b/patches/kernel/0253-x86-tboot-Unbreak-tboot-with-PTI-enabled.patch

deleted file mode 100644 (file)

index aa0048e..0000000
--- a/patches/kernel/0253-x86-tboot-Unbreak-tboot-with-PTI-enabled.patch
+++ /dev/null
@@ -1,58 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Sat, 6 Jan 2018 18:41:14 +0100
-Subject: [PATCH] x86/tboot: Unbreak tboot with PTI enabled
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This is another case similar to what EFI does: create a new set of
-page tables, map some code at a low address, and jump to it.  PTI
-mistakes this low address for userspace and mistakenly marks it
-non-executable in an effort to make it unusable for userspace.
-
-Undo the poison to allow execution.
-
-Fixes: 385ce0ea4c07 ("x86/mm/pti: Add Kconfig")
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
-Cc: Tim Chen <tim.c.chen@linux.intel.com>
-Cc: Jon Masters <jcm@redhat.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Andi Kleen <andi@firstfloor.org>
-Cc: Jeff Law <law@redhat.com>
-Cc: Paolo Bonzini <pbonzini@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
-Cc: David" <dwmw@amazon.co.uk>
-Cc: Nick Clifton <nickc@redhat.com>
-Cc: stable@vger.kernel.org
-Link: https://lkml.kernel.org/r/20180108102805.GK25546@redhat.com
-(cherry picked from commit 262b6b30087246abf09d6275eb0c0dc421bcbe38)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f03e9108405491791f0b883a2d95e2620ddfce64)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/tboot.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
-index a4eb27918ceb..75869a4b6c41 100644
---- a/arch/x86/kernel/tboot.c
-+++ b/arch/x86/kernel/tboot.c
-@@ -127,6 +127,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
-       p4d = p4d_alloc(&tboot_mm, pgd, vaddr);
-       if (!p4d)
-               return -1;
-+      pgd->pgd &= ~_PAGE_NX;
-       pud = pud_alloc(&tboot_mm, p4d, vaddr);
-       if (!pud)
-               return -1;
--- 
-2.14.2
-
diff --git a/patches/kernel/0254-x86-mm-pti-Remove-dead-logic-in-pti_user_pagetable_w.patch b/patches/kernel/0254-x86-mm-pti-Remove-dead-logic-in-pti_user_pagetable_w.patch

deleted file mode 100644 (file)

index 8056750..0000000
--- a/patches/kernel/0254-x86-mm-pti-Remove-dead-logic-in-pti_user_pagetable_w.patch
+++ /dev/null
@@ -1,151 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Jike Song <albcamus@gmail.com>
-Date: Tue, 9 Jan 2018 00:03:41 +0800
-Subject: [PATCH] x86/mm/pti: Remove dead logic in pti_user_pagetable_walk*()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The following code contains dead logic:
-
- 162 if (pgd_none(*pgd)) {
- 163         unsigned long new_p4d_page = __get_free_page(gfp);
- 164         if (!new_p4d_page)
- 165                 return NULL;
- 166
- 167         if (pgd_none(*pgd)) {
- 168                 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
- 169                 new_p4d_page = 0;
- 170         }
- 171         if (new_p4d_page)
- 172                 free_page(new_p4d_page);
- 173 }
-
-There can't be any difference between two pgd_none(*pgd) at L162 and L167,
-so it's always false at L171.
-
-Dave Hansen explained:
-
- Yes, the double-test was part of an optimization where we attempted to
- avoid using a global spinlock in the fork() path.  We would check for
- unallocated mid-level page tables without the lock.  The lock was only
- taken when we needed to *make* an entry to avoid collisions.
-
- Now that it is all single-threaded, there is no chance of a collision,
- no need for a lock, and no need for the re-check.
-
-As all these functions are only called during init, mark them __init as
-well.
-
-Fixes: 03f4424f348e ("x86/mm/pti: Add functions to clone kernel PMDs")
-Signed-off-by: Jike Song <albcamus@gmail.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
-Cc: Andi Kleen <ak@linux.intel.com>
-Cc: Tom Lendacky <thomas.lendacky@amd.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Tim Chen <tim.c.chen@linux.intel.com>
-Cc: Jiri Koshina <jikos@kernel.org>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Kees Cook <keescook@google.com>
-Cc: Andi Lutomirski <luto@amacapital.net>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Greg KH <gregkh@linux-foundation.org>
-Cc: David Woodhouse <dwmw@amazon.co.uk>
-Cc: Paul Turner <pjt@google.com>
-Cc: stable@vger.kernel.org
-Link: https://lkml.kernel.org/r/20180108160341.3461-1-albcamus@gmail.com
-
-(cherry picked from commit 8d56eff266f3e41a6c39926269c4c3f58f881a8e)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit de8ab6bea570e70d1478af2c1667714bc900ae70)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/pti.c | 32 ++++++--------------------------
- 1 file changed, 6 insertions(+), 26 deletions(-)
-
-diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
-index 43d4a4a29037..ce38f165489b 100644
---- a/arch/x86/mm/pti.c
-+++ b/arch/x86/mm/pti.c
-@@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
-  *
-  * Returns a pointer to a P4D on success, or NULL on failure.
-  */
--static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
-+static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
- {
-       pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
-       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
-@@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
-               if (!new_p4d_page)
-                       return NULL;
- 
--              if (pgd_none(*pgd)) {
--                      set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
--                      new_p4d_page = 0;
--              }
--              if (new_p4d_page)
--                      free_page(new_p4d_page);
-+              set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
-       }
-       BUILD_BUG_ON(pgd_large(*pgd) != 0);
- 
-@@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
-  *
-  * Returns a pointer to a PMD on success, or NULL on failure.
-  */
--static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
-+static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
- {
-       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
-       p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
-@@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
-               if (!new_pud_page)
-                       return NULL;
- 
--              if (p4d_none(*p4d)) {
--                      set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
--                      new_pud_page = 0;
--              }
--              if (new_pud_page)
--                      free_page(new_pud_page);
-+              set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
-       }
- 
-       pud = pud_offset(p4d, address);
-@@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
-               if (!new_pmd_page)
-                       return NULL;
- 
--              if (pud_none(*pud)) {
--                      set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
--                      new_pmd_page = 0;
--              }
--              if (new_pmd_page)
--                      free_page(new_pmd_page);
-+              set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
-       }
- 
-       return pmd_offset(pud, address);
-@@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
-               if (!new_pte_page)
-                       return NULL;
- 
--              if (pmd_none(*pmd)) {
--                      set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
--                      new_pte_page = 0;
--              }
--              if (new_pte_page)
--                      free_page(new_pte_page);
-+              set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
-       }
- 
-       pte = pte_offset_kernel(pmd, address);
--- 
-2.14.2
-
diff --git a/patches/kernel/0254-x86-tboot-Unbreak-tboot-with-PTI-enabled.patch b/patches/kernel/0254-x86-tboot-Unbreak-tboot-with-PTI-enabled.patch

new file mode 100644 (file)

index 0000000..aa0048e
--- /dev/null
+++ b/patches/kernel/0254-x86-tboot-Unbreak-tboot-with-PTI-enabled.patch
@@ -0,0 +1,58 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Sat, 6 Jan 2018 18:41:14 +0100
+Subject: [PATCH] x86/tboot: Unbreak tboot with PTI enabled
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This is another case similar to what EFI does: create a new set of
+page tables, map some code at a low address, and jump to it.  PTI
+mistakes this low address for userspace and mistakenly marks it
+non-executable in an effort to make it unusable for userspace.
+
+Undo the poison to allow execution.
+
+Fixes: 385ce0ea4c07 ("x86/mm/pti: Add Kconfig")
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Jon Masters <jcm@redhat.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: Jeff Law <law@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: David" <dwmw@amazon.co.uk>
+Cc: Nick Clifton <nickc@redhat.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20180108102805.GK25546@redhat.com
+(cherry picked from commit 262b6b30087246abf09d6275eb0c0dc421bcbe38)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f03e9108405491791f0b883a2d95e2620ddfce64)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/tboot.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
+index a4eb27918ceb..75869a4b6c41 100644
+--- a/arch/x86/kernel/tboot.c
++++ b/arch/x86/kernel/tboot.c
+@@ -127,6 +127,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
+       p4d = p4d_alloc(&tboot_mm, pgd, vaddr);
+       if (!p4d)
+               return -1;
++      pgd->pgd &= ~_PAGE_NX;
+       pud = pud_alloc(&tboot_mm, p4d, vaddr);
+       if (!pud)
+               return -1;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0255-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch b/patches/kernel/0255-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch

deleted file mode 100644 (file)

index 65d8af4..0000000
--- a/patches/kernel/0255-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch
+++ /dev/null
@@ -1,77 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Mon, 8 Jan 2018 16:09:21 -0600
-Subject: [PATCH] x86/cpu/AMD: Make LFENCE a serializing instruction
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-To aid in speculation control, make LFENCE a serializing instruction
-since it has less overhead than MFENCE.  This is done by setting bit 1
-of MSR 0xc0011029 (DE_CFG).  Some families that support LFENCE do not
-have this MSR.  For these families, the LFENCE instruction is already
-serializing.
-
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Tim Chen <tim.c.chen@linux.intel.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dan Williams <dan.j.williams@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
-Cc: David Woodhouse <dwmw@amazon.co.uk>
-Cc: Paul Turner <pjt@google.com>
-Link: https://lkml.kernel.org/r/20180108220921.12580.71694.stgit@tlendack-t1.amdoffice.net
-
-(cherry picked from commit e4d0e84e490790798691aaa0f2e598637f1867ec)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit bde943193168fe9a3814badaa0cae3422029dce5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/msr-index.h |  2 ++
- arch/x86/kernel/cpu/amd.c        | 10 ++++++++++
- 2 files changed, 12 insertions(+)
-
-diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
-index 5573c75f8e4c..25147df4acfc 100644
---- a/arch/x86/include/asm/msr-index.h
-+++ b/arch/x86/include/asm/msr-index.h
-@@ -351,6 +351,8 @@
- #define FAM10H_MMIO_CONF_BASE_MASK    0xfffffffULL
- #define FAM10H_MMIO_CONF_BASE_SHIFT   20
- #define MSR_FAM10H_NODE_ID            0xc001100c
-+#define MSR_F10H_DECFG                        0xc0011029
-+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT   1
- 
- /* K8 MSRs */
- #define MSR_K8_TOP_MEM1                       0xc001001a
-diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
-index 2a5328cc03a6..c9a4e4db7860 100644
---- a/arch/x86/kernel/cpu/amd.c
-+++ b/arch/x86/kernel/cpu/amd.c
-@@ -785,6 +785,16 @@ static void init_amd(struct cpuinfo_x86 *c)
-               set_cpu_cap(c, X86_FEATURE_K8);
- 
-       if (cpu_has(c, X86_FEATURE_XMM2)) {
-+              /*
-+               * A serializing LFENCE has less overhead than MFENCE, so
-+               * use it for execution serialization.  On families which
-+               * don't have that MSR, LFENCE is already serializing.
-+               * msr_set_bit() uses the safe accessors, too, even if the MSR
-+               * is not present.
-+               */
-+              msr_set_bit(MSR_F10H_DECFG,
-+                          MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
-+
-               /* MFENCE stops RDTSC speculation */
-               set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
-       }
--- 
-2.14.2
-
diff --git a/patches/kernel/0255-x86-mm-pti-Remove-dead-logic-in-pti_user_pagetable_w.patch b/patches/kernel/0255-x86-mm-pti-Remove-dead-logic-in-pti_user_pagetable_w.patch

new file mode 100644 (file)

index 0000000..8056750
--- /dev/null
+++ b/patches/kernel/0255-x86-mm-pti-Remove-dead-logic-in-pti_user_pagetable_w.patch
@@ -0,0 +1,151 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Jike Song <albcamus@gmail.com>
+Date: Tue, 9 Jan 2018 00:03:41 +0800
+Subject: [PATCH] x86/mm/pti: Remove dead logic in pti_user_pagetable_walk*()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The following code contains dead logic:
+
+ 162 if (pgd_none(*pgd)) {
+ 163         unsigned long new_p4d_page = __get_free_page(gfp);
+ 164         if (!new_p4d_page)
+ 165                 return NULL;
+ 166
+ 167         if (pgd_none(*pgd)) {
+ 168                 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
+ 169                 new_p4d_page = 0;
+ 170         }
+ 171         if (new_p4d_page)
+ 172                 free_page(new_p4d_page);
+ 173 }
+
+There can't be any difference between two pgd_none(*pgd) at L162 and L167,
+so it's always false at L171.
+
+Dave Hansen explained:
+
+ Yes, the double-test was part of an optimization where we attempted to
+ avoid using a global spinlock in the fork() path.  We would check for
+ unallocated mid-level page tables without the lock.  The lock was only
+ taken when we needed to *make* an entry to avoid collisions.
+
+ Now that it is all single-threaded, there is no chance of a collision,
+ no need for a lock, and no need for the re-check.
+
+As all these functions are only called during init, mark them __init as
+well.
+
+Fixes: 03f4424f348e ("x86/mm/pti: Add functions to clone kernel PMDs")
+Signed-off-by: Jike Song <albcamus@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Jiri Koshina <jikos@kernel.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Kees Cook <keescook@google.com>
+Cc: Andi Lutomirski <luto@amacapital.net>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg KH <gregkh@linux-foundation.org>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Paul Turner <pjt@google.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20180108160341.3461-1-albcamus@gmail.com
+
+(cherry picked from commit 8d56eff266f3e41a6c39926269c4c3f58f881a8e)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit de8ab6bea570e70d1478af2c1667714bc900ae70)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/pti.c | 32 ++++++--------------------------
+ 1 file changed, 6 insertions(+), 26 deletions(-)
+
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+index 43d4a4a29037..ce38f165489b 100644
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+  *
+  * Returns a pointer to a P4D on success, or NULL on failure.
+  */
+-static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
++static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+ {
+       pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
+       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+@@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+               if (!new_p4d_page)
+                       return NULL;
+ 
+-              if (pgd_none(*pgd)) {
+-                      set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
+-                      new_p4d_page = 0;
+-              }
+-              if (new_p4d_page)
+-                      free_page(new_p4d_page);
++              set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
+       }
+       BUILD_BUG_ON(pgd_large(*pgd) != 0);
+ 
+@@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+  *
+  * Returns a pointer to a PMD on success, or NULL on failure.
+  */
+-static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
++static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+ {
+       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+       p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
+@@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+               if (!new_pud_page)
+                       return NULL;
+ 
+-              if (p4d_none(*p4d)) {
+-                      set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
+-                      new_pud_page = 0;
+-              }
+-              if (new_pud_page)
+-                      free_page(new_pud_page);
++              set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
+       }
+ 
+       pud = pud_offset(p4d, address);
+@@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+               if (!new_pmd_page)
+                       return NULL;
+ 
+-              if (pud_none(*pud)) {
+-                      set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+-                      new_pmd_page = 0;
+-              }
+-              if (new_pmd_page)
+-                      free_page(new_pmd_page);
++              set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+       }
+ 
+       return pmd_offset(pud, address);
+@@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+               if (!new_pte_page)
+                       return NULL;
+ 
+-              if (pmd_none(*pmd)) {
+-                      set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+-                      new_pte_page = 0;
+-              }
+-              if (new_pte_page)
+-                      free_page(new_pte_page);
++              set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+       }
+ 
+       pte = pte_offset_kernel(pmd, address);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0256-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch b/patches/kernel/0256-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch

new file mode 100644 (file)

index 0000000..65d8af4
--- /dev/null
+++ b/patches/kernel/0256-x86-cpu-AMD-Make-LFENCE-a-serializing-instruction.patch
@@ -0,0 +1,77 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 8 Jan 2018 16:09:21 -0600
+Subject: [PATCH] x86/cpu/AMD: Make LFENCE a serializing instruction
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+To aid in speculation control, make LFENCE a serializing instruction
+since it has less overhead than MFENCE.  This is done by setting bit 1
+of MSR 0xc0011029 (DE_CFG).  Some families that support LFENCE do not
+have this MSR.  For these families, the LFENCE instruction is already
+serializing.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/20180108220921.12580.71694.stgit@tlendack-t1.amdoffice.net
+
+(cherry picked from commit e4d0e84e490790798691aaa0f2e598637f1867ec)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit bde943193168fe9a3814badaa0cae3422029dce5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/msr-index.h |  2 ++
+ arch/x86/kernel/cpu/amd.c        | 10 ++++++++++
+ 2 files changed, 12 insertions(+)
+
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index 5573c75f8e4c..25147df4acfc 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -351,6 +351,8 @@
+ #define FAM10H_MMIO_CONF_BASE_MASK    0xfffffffULL
+ #define FAM10H_MMIO_CONF_BASE_SHIFT   20
+ #define MSR_FAM10H_NODE_ID            0xc001100c
++#define MSR_F10H_DECFG                        0xc0011029
++#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT   1
+ 
+ /* K8 MSRs */
+ #define MSR_K8_TOP_MEM1                       0xc001001a
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index 2a5328cc03a6..c9a4e4db7860 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -785,6 +785,16 @@ static void init_amd(struct cpuinfo_x86 *c)
+               set_cpu_cap(c, X86_FEATURE_K8);
+ 
+       if (cpu_has(c, X86_FEATURE_XMM2)) {
++              /*
++               * A serializing LFENCE has less overhead than MFENCE, so
++               * use it for execution serialization.  On families which
++               * don't have that MSR, LFENCE is already serializing.
++               * msr_set_bit() uses the safe accessors, too, even if the MSR
++               * is not present.
++               */
++              msr_set_bit(MSR_F10H_DECFG,
++                          MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
++
+               /* MFENCE stops RDTSC speculation */
+               set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+       }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0256-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch b/patches/kernel/0256-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch

deleted file mode 100644 (file)

index d2324ef..0000000
--- a/patches/kernel/0256-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch
+++ /dev/null
@@ -1,92 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Mon, 8 Jan 2018 16:09:32 -0600
-Subject: [PATCH] x86/cpu/AMD: Use LFENCE_RDTSC in preference to MFENCE_RDTSC
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-With LFENCE now a serializing instruction, use LFENCE_RDTSC in preference
-to MFENCE_RDTSC.  However, since the kernel could be running under a
-hypervisor that does not support writing that MSR, read the MSR back and
-verify that the bit has been set successfully.  If the MSR can be read
-and the bit is set, then set the LFENCE_RDTSC feature, otherwise set the
-MFENCE_RDTSC feature.
-
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Tim Chen <tim.c.chen@linux.intel.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Dan Williams <dan.j.williams@intel.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
-Cc: David Woodhouse <dwmw@amazon.co.uk>
-Cc: Paul Turner <pjt@google.com>
-Link: https://lkml.kernel.org/r/20180108220932.12580.52458.stgit@tlendack-t1.amdoffice.net
-
-(cherry picked from commit 9c6a73c75864ad9fa49e5fa6513e4c4071c0e29f)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit dc39f26bf11d270cb4cfd251919afb16d98d6c2b)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/msr-index.h |  1 +
- arch/x86/kernel/cpu/amd.c        | 18 ++++++++++++++++--
- 2 files changed, 17 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
-index 25147df4acfc..db88b7f852b4 100644
---- a/arch/x86/include/asm/msr-index.h
-+++ b/arch/x86/include/asm/msr-index.h
-@@ -353,6 +353,7 @@
- #define MSR_FAM10H_NODE_ID            0xc001100c
- #define MSR_F10H_DECFG                        0xc0011029
- #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT   1
-+#define MSR_F10H_DECFG_LFENCE_SERIALIZE               BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
- 
- /* K8 MSRs */
- #define MSR_K8_TOP_MEM1                       0xc001001a
-diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
-index c9a4e4db7860..99eef4a09fd9 100644
---- a/arch/x86/kernel/cpu/amd.c
-+++ b/arch/x86/kernel/cpu/amd.c
-@@ -785,6 +785,9 @@ static void init_amd(struct cpuinfo_x86 *c)
-               set_cpu_cap(c, X86_FEATURE_K8);
- 
-       if (cpu_has(c, X86_FEATURE_XMM2)) {
-+              unsigned long long val;
-+              int ret;
-+
-               /*
-                * A serializing LFENCE has less overhead than MFENCE, so
-                * use it for execution serialization.  On families which
-@@ -795,8 +798,19 @@ static void init_amd(struct cpuinfo_x86 *c)
-               msr_set_bit(MSR_F10H_DECFG,
-                           MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
- 
--              /* MFENCE stops RDTSC speculation */
--              set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
-+              /*
-+               * Verify that the MSR write was successful (could be running
-+               * under a hypervisor) and only then assume that LFENCE is
-+               * serializing.
-+               */
-+              ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
-+              if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
-+                      /* A serializing LFENCE stops RDTSC speculation */
-+                      set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
-+              } else {
-+                      /* MFENCE stops RDTSC speculation */
-+                      set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
-+              }
-       }
- 
-       /*
--- 
-2.14.2
-
diff --git a/patches/kernel/0257-x86-alternatives-Fix-optimize_nops-checking.patch b/patches/kernel/0257-x86-alternatives-Fix-optimize_nops-checking.patch

deleted file mode 100644 (file)

index 81b3d15..0000000
--- a/patches/kernel/0257-x86-alternatives-Fix-optimize_nops-checking.patch
+++ /dev/null
@@ -1,63 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Borislav Petkov <bp@suse.de>
-Date: Wed, 10 Jan 2018 12:28:16 +0100
-Subject: [PATCH] x86/alternatives: Fix optimize_nops() checking
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The alternatives code checks only the first byte whether it is a NOP, but
-with NOPs in front of the payload and having actual instructions after it
-breaks the "optimized' test.
-
-Make sure to scan all bytes before deciding to optimize the NOPs in there.
-
-Reported-by: David Woodhouse <dwmw2@infradead.org>
-Signed-off-by: Borislav Petkov <bp@suse.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Tom Lendacky <thomas.lendacky@amd.com>
-Cc: Andi Kleen <ak@linux.intel.com>
-Cc: Tim Chen <tim.c.chen@linux.intel.com>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Jiri Kosina <jikos@kernel.org>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Andi Kleen <andi@firstfloor.org>
-Cc: Andrew Lutomirski <luto@kernel.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
-Cc: Paul Turner <pjt@google.com>
-Link: https://lkml.kernel.org/r/20180110112815.mgciyf5acwacphkq@pd.tnic
-
-(cherry picked from commit 612e8e9350fd19cae6900cf36ea0c6892d1a0dca)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit dc241f68557ee1929a92b9ec6f7a1294bbbd4f00)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/alternative.c | 7 +++++--
- 1 file changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
-index 32e14d137416..5dc05755a044 100644
---- a/arch/x86/kernel/alternative.c
-+++ b/arch/x86/kernel/alternative.c
-@@ -344,9 +344,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
- static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
- {
-       unsigned long flags;
-+      int i;
- 
--      if (instr[0] != 0x90)
--              return;
-+      for (i = 0; i < a->padlen; i++) {
-+              if (instr[i] != 0x90)
-+                      return;
-+      }
- 
-       local_irq_save(flags);
-       add_nops(instr + (a->instrlen - a->padlen), a->padlen);
--- 
-2.14.2
-
diff --git a/patches/kernel/0257-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch b/patches/kernel/0257-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch

new file mode 100644 (file)

index 0000000..d2324ef
--- /dev/null
+++ b/patches/kernel/0257-x86-cpu-AMD-Use-LFENCE_RDTSC-in-preference-to-MFENCE.patch
@@ -0,0 +1,92 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 8 Jan 2018 16:09:32 -0600
+Subject: [PATCH] x86/cpu/AMD: Use LFENCE_RDTSC in preference to MFENCE_RDTSC
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+With LFENCE now a serializing instruction, use LFENCE_RDTSC in preference
+to MFENCE_RDTSC.  However, since the kernel could be running under a
+hypervisor that does not support writing that MSR, read the MSR back and
+verify that the bit has been set successfully.  If the MSR can be read
+and the bit is set, then set the LFENCE_RDTSC feature, otherwise set the
+MFENCE_RDTSC feature.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/20180108220932.12580.52458.stgit@tlendack-t1.amdoffice.net
+
+(cherry picked from commit 9c6a73c75864ad9fa49e5fa6513e4c4071c0e29f)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit dc39f26bf11d270cb4cfd251919afb16d98d6c2b)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/msr-index.h |  1 +
+ arch/x86/kernel/cpu/amd.c        | 18 ++++++++++++++++--
+ 2 files changed, 17 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index 25147df4acfc..db88b7f852b4 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -353,6 +353,7 @@
+ #define MSR_FAM10H_NODE_ID            0xc001100c
+ #define MSR_F10H_DECFG                        0xc0011029
+ #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT   1
++#define MSR_F10H_DECFG_LFENCE_SERIALIZE               BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
+ 
+ /* K8 MSRs */
+ #define MSR_K8_TOP_MEM1                       0xc001001a
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index c9a4e4db7860..99eef4a09fd9 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -785,6 +785,9 @@ static void init_amd(struct cpuinfo_x86 *c)
+               set_cpu_cap(c, X86_FEATURE_K8);
+ 
+       if (cpu_has(c, X86_FEATURE_XMM2)) {
++              unsigned long long val;
++              int ret;
++
+               /*
+                * A serializing LFENCE has less overhead than MFENCE, so
+                * use it for execution serialization.  On families which
+@@ -795,8 +798,19 @@ static void init_amd(struct cpuinfo_x86 *c)
+               msr_set_bit(MSR_F10H_DECFG,
+                           MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+ 
+-              /* MFENCE stops RDTSC speculation */
+-              set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
++              /*
++               * Verify that the MSR write was successful (could be running
++               * under a hypervisor) and only then assume that LFENCE is
++               * serializing.
++               */
++              ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
++              if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
++                      /* A serializing LFENCE stops RDTSC speculation */
++                      set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
++              } else {
++                      /* MFENCE stops RDTSC speculation */
++                      set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
++              }
+       }
+ 
+       /*
+-- 
+2.14.2
+
diff --git a/patches/kernel/0258-x86-alternatives-Fix-optimize_nops-checking.patch b/patches/kernel/0258-x86-alternatives-Fix-optimize_nops-checking.patch

new file mode 100644 (file)

index 0000000..81b3d15
--- /dev/null
+++ b/patches/kernel/0258-x86-alternatives-Fix-optimize_nops-checking.patch
@@ -0,0 +1,63 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Wed, 10 Jan 2018 12:28:16 +0100
+Subject: [PATCH] x86/alternatives: Fix optimize_nops() checking
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The alternatives code checks only the first byte whether it is a NOP, but
+with NOPs in front of the payload and having actual instructions after it
+breaks the "optimized' test.
+
+Make sure to scan all bytes before deciding to optimize the NOPs in there.
+
+Reported-by: David Woodhouse <dwmw2@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: Andrew Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/20180110112815.mgciyf5acwacphkq@pd.tnic
+
+(cherry picked from commit 612e8e9350fd19cae6900cf36ea0c6892d1a0dca)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit dc241f68557ee1929a92b9ec6f7a1294bbbd4f00)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/alternative.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
+index 32e14d137416..5dc05755a044 100644
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -344,9 +344,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
+ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
+ {
+       unsigned long flags;
++      int i;
+ 
+-      if (instr[0] != 0x90)
+-              return;
++      for (i = 0; i < a->padlen; i++) {
++              if (instr[i] != 0x90)
++                      return;
++      }
+ 
+       local_irq_save(flags);
+       add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0258-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch b/patches/kernel/0258-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch

deleted file mode 100644 (file)

index 774cb5d..0000000
--- a/patches/kernel/0258-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Wed, 10 Jan 2018 14:49:39 -0800
-Subject: [PATCH] x86/pti: Make unpoison of pgd for trusted boot work for real
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-The inital fix for trusted boot and PTI potentially misses the pgd clearing
-if pud_alloc() sets a PGD.  It probably works in *practice* because for two
-adjacent calls to map_tboot_page() that share a PGD entry, the first will
-clear NX, *then* allocate and set the PGD (without NX clear).  The second
-call will *not* allocate but will clear the NX bit.
-
-Defer the NX clearing to a point after it is known that all top-level
-allocations have occurred.  Add a comment to clarify why.
-
-[ tglx: Massaged changelog ]
-
-Fixes: 262b6b30087 ("x86/tboot: Unbreak tboot with PTI enabled")
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
-Cc: Jon Masters <jcm@redhat.com>
-Cc: "Tim Chen" <tim.c.chen@linux.intel.com>
-Cc: gnomes@lxorguk.ukuu.org.uk
-Cc: peterz@infradead.org
-Cc: ning.sun@intel.com
-Cc: tboot-devel@lists.sourceforge.net
-Cc: andi@firstfloor.org
-Cc: luto@kernel.org
-Cc: law@redhat.com
-Cc: pbonzini@redhat.com
-Cc: torvalds@linux-foundation.org
-Cc: gregkh@linux-foundation.org
-Cc: dwmw@amazon.co.uk
-Cc: nickc@redhat.com
-Cc: stable@vger.kernel.org
-Link: https://lkml.kernel.org/r/20180110224939.2695CD47@viggo.jf.intel.com
-(cherry picked from commit 8a931d1e24bacf01f00a35d43bfe7917256c5c49)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9935124a5c771c004a578423275633232fb7a006)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/tboot.c | 12 +++++++++++-
- 1 file changed, 11 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
-index 75869a4b6c41..a2486f444073 100644
---- a/arch/x86/kernel/tboot.c
-+++ b/arch/x86/kernel/tboot.c
-@@ -127,7 +127,6 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
-       p4d = p4d_alloc(&tboot_mm, pgd, vaddr);
-       if (!p4d)
-               return -1;
--      pgd->pgd &= ~_PAGE_NX;
-       pud = pud_alloc(&tboot_mm, p4d, vaddr);
-       if (!pud)
-               return -1;
-@@ -139,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
-               return -1;
-       set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
-       pte_unmap(pte);
-+
-+      /*
-+       * PTI poisons low addresses in the kernel page tables in the
-+       * name of making them unusable for userspace.  To execute
-+       * code at such a low address, the poison must be cleared.
-+       *
-+       * Note: 'pgd' actually gets set in p4d_alloc() _or_
-+       * pud_alloc() depending on 4/5-level paging.
-+       */
-+      pgd->pgd &= ~_PAGE_NX;
-+
-       return 0;
- }
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0259-locking-barriers-introduce-new-memory-barrier-gmb.patch b/patches/kernel/0259-locking-barriers-introduce-new-memory-barrier-gmb.patch

deleted file mode 100644 (file)

index 1079661..0000000
--- a/patches/kernel/0259-locking-barriers-introduce-new-memory-barrier-gmb.patch
+++ /dev/null
@@ -1,62 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:43 +0300
-Subject: [PATCH] locking/barriers: introduce new memory barrier gmb()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-In constrast to existing mb() and rmb() barriers,
-gmb() barrier is arch-independent and can be used to
-implement any type of memory barrier.
-In x86 case, it is either lfence or mfence, based on
-processor type. ARM and others can define it according
-to their needs.
-
-Suggested-by: Arjan van de Ven <arjan@linux.intel.com>
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 6834bd7e6159da957a6c01deebf16132a694bc23)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/barrier.h | 3 +++
- include/asm-generic/barrier.h  | 4 ++++
- 2 files changed, 7 insertions(+)
-
-diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
-index bfb28caf97b1..aae78054cae2 100644
---- a/arch/x86/include/asm/barrier.h
-+++ b/arch/x86/include/asm/barrier.h
-@@ -23,6 +23,9 @@
- #define wmb() asm volatile("sfence" ::: "memory")
- #endif
- 
-+#define gmb() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
-+                                     "lfence", X86_FEATURE_LFENCE_RDTSC);
-+
- #ifdef CONFIG_X86_PPRO_FENCE
- #define dma_rmb()     rmb()
- #else
-diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
-index fe297b599b0a..0ee1345c9222 100644
---- a/include/asm-generic/barrier.h
-+++ b/include/asm-generic/barrier.h
-@@ -42,6 +42,10 @@
- #define wmb() mb()
- #endif
- 
-+#ifndef gmb
-+#define gmb() do { } while (0)
-+#endif
-+
- #ifndef dma_rmb
- #define dma_rmb()     rmb()
- #endif
--- 
-2.14.2
-
diff --git a/patches/kernel/0259-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch b/patches/kernel/0259-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch

new file mode 100644 (file)

index 0000000..774cb5d
--- /dev/null
+++ b/patches/kernel/0259-x86-pti-Make-unpoison-of-pgd-for-trusted-boot-work-f.patch
@@ -0,0 +1,83 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Wed, 10 Jan 2018 14:49:39 -0800
+Subject: [PATCH] x86/pti: Make unpoison of pgd for trusted boot work for real
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+The inital fix for trusted boot and PTI potentially misses the pgd clearing
+if pud_alloc() sets a PGD.  It probably works in *practice* because for two
+adjacent calls to map_tboot_page() that share a PGD entry, the first will
+clear NX, *then* allocate and set the PGD (without NX clear).  The second
+call will *not* allocate but will clear the NX bit.
+
+Defer the NX clearing to a point after it is known that all top-level
+allocations have occurred.  Add a comment to clarify why.
+
+[ tglx: Massaged changelog ]
+
+Fixes: 262b6b30087 ("x86/tboot: Unbreak tboot with PTI enabled")
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Jon Masters <jcm@redhat.com>
+Cc: "Tim Chen" <tim.c.chen@linux.intel.com>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: peterz@infradead.org
+Cc: ning.sun@intel.com
+Cc: tboot-devel@lists.sourceforge.net
+Cc: andi@firstfloor.org
+Cc: luto@kernel.org
+Cc: law@redhat.com
+Cc: pbonzini@redhat.com
+Cc: torvalds@linux-foundation.org
+Cc: gregkh@linux-foundation.org
+Cc: dwmw@amazon.co.uk
+Cc: nickc@redhat.com
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20180110224939.2695CD47@viggo.jf.intel.com
+(cherry picked from commit 8a931d1e24bacf01f00a35d43bfe7917256c5c49)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9935124a5c771c004a578423275633232fb7a006)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/tboot.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
+index 75869a4b6c41..a2486f444073 100644
+--- a/arch/x86/kernel/tboot.c
++++ b/arch/x86/kernel/tboot.c
+@@ -127,7 +127,6 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
+       p4d = p4d_alloc(&tboot_mm, pgd, vaddr);
+       if (!p4d)
+               return -1;
+-      pgd->pgd &= ~_PAGE_NX;
+       pud = pud_alloc(&tboot_mm, p4d, vaddr);
+       if (!pud)
+               return -1;
+@@ -139,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
+               return -1;
+       set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
+       pte_unmap(pte);
++
++      /*
++       * PTI poisons low addresses in the kernel page tables in the
++       * name of making them unusable for userspace.  To execute
++       * code at such a low address, the poison must be cleared.
++       *
++       * Note: 'pgd' actually gets set in p4d_alloc() _or_
++       * pud_alloc() depending on 4/5-level paging.
++       */
++      pgd->pgd &= ~_PAGE_NX;
++
+       return 0;
+ }
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0260-bpf-prevent-speculative-execution-in-eBPF-interprete.patch b/patches/kernel/0260-bpf-prevent-speculative-execution-in-eBPF-interprete.patch

deleted file mode 100644 (file)

index 4758995..0000000
--- a/patches/kernel/0260-bpf-prevent-speculative-execution-in-eBPF-interprete.patch
+++ /dev/null
@@ -1,60 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:44 +0300
-Subject: [PATCH] bpf: prevent speculative execution in eBPF interpreter
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-This adds a generic memory barrier before LD_IMM_DW and
-LDX_MEM_B/H/W/DW eBPF instructions during eBPF program
-execution in order to prevent speculative execution on out
-of bound BFP_MAP array indexes. This way an arbitary kernel
-memory is not exposed through side channel attacks.
-
-For more details, please see this Google Project Zero report: tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit dd13f73106c260dea7a689d33d1457639af820aa)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- kernel/bpf/core.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
-index 9a1bed1f3029..3f83c60e3e86 100644
---- a/kernel/bpf/core.c
-+++ b/kernel/bpf/core.c
-@@ -33,6 +33,7 @@
- #include <linux/rcupdate.h>
- 
- #include <asm/unaligned.h>
-+#include <asm/barrier.h>
- 
- /* Registers */
- #define BPF_R0        regs[BPF_REG_0]
-@@ -920,6 +921,7 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
-               DST = IMM;
-               CONT;
-       LD_IMM_DW:
-+              gmb();
-               DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
-               insn++;
-               CONT;
-@@ -1133,6 +1135,7 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
-               *(SIZE *)(unsigned long) (DST + insn->off) = IMM;       \
-               CONT;                                                   \
-       LDX_MEM_##SIZEOP:                                               \
-+              gmb();                                                  \
-               DST = *(SIZE *)(unsigned long) (SRC + insn->off);       \
-               CONT;
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0260-locking-barriers-introduce-new-memory-barrier-gmb.patch b/patches/kernel/0260-locking-barriers-introduce-new-memory-barrier-gmb.patch

new file mode 100644 (file)

index 0000000..1079661
--- /dev/null
+++ b/patches/kernel/0260-locking-barriers-introduce-new-memory-barrier-gmb.patch
@@ -0,0 +1,62 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:43 +0300
+Subject: [PATCH] locking/barriers: introduce new memory barrier gmb()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+In constrast to existing mb() and rmb() barriers,
+gmb() barrier is arch-independent and can be used to
+implement any type of memory barrier.
+In x86 case, it is either lfence or mfence, based on
+processor type. ARM and others can define it according
+to their needs.
+
+Suggested-by: Arjan van de Ven <arjan@linux.intel.com>
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 6834bd7e6159da957a6c01deebf16132a694bc23)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/barrier.h | 3 +++
+ include/asm-generic/barrier.h  | 4 ++++
+ 2 files changed, 7 insertions(+)
+
+diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
+index bfb28caf97b1..aae78054cae2 100644
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -23,6 +23,9 @@
+ #define wmb() asm volatile("sfence" ::: "memory")
+ #endif
+ 
++#define gmb() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
++                                     "lfence", X86_FEATURE_LFENCE_RDTSC);
++
+ #ifdef CONFIG_X86_PPRO_FENCE
+ #define dma_rmb()     rmb()
+ #else
+diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
+index fe297b599b0a..0ee1345c9222 100644
+--- a/include/asm-generic/barrier.h
++++ b/include/asm-generic/barrier.h
+@@ -42,6 +42,10 @@
+ #define wmb() mb()
+ #endif
+ 
++#ifndef gmb
++#define gmb() do { } while (0)
++#endif
++
+ #ifndef dma_rmb
+ #define dma_rmb()     rmb()
+ #endif
+-- 
+2.14.2
+
diff --git a/patches/kernel/0261-bpf-prevent-speculative-execution-in-eBPF-interprete.patch b/patches/kernel/0261-bpf-prevent-speculative-execution-in-eBPF-interprete.patch

new file mode 100644 (file)

index 0000000..4758995
--- /dev/null
+++ b/patches/kernel/0261-bpf-prevent-speculative-execution-in-eBPF-interprete.patch
@@ -0,0 +1,60 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:44 +0300
+Subject: [PATCH] bpf: prevent speculative execution in eBPF interpreter
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+This adds a generic memory barrier before LD_IMM_DW and
+LDX_MEM_B/H/W/DW eBPF instructions during eBPF program
+execution in order to prevent speculative execution on out
+of bound BFP_MAP array indexes. This way an arbitary kernel
+memory is not exposed through side channel attacks.
+
+For more details, please see this Google Project Zero report: tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit dd13f73106c260dea7a689d33d1457639af820aa)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ kernel/bpf/core.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
+index 9a1bed1f3029..3f83c60e3e86 100644
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -33,6 +33,7 @@
+ #include <linux/rcupdate.h>
+ 
+ #include <asm/unaligned.h>
++#include <asm/barrier.h>
+ 
+ /* Registers */
+ #define BPF_R0        regs[BPF_REG_0]
+@@ -920,6 +921,7 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
+               DST = IMM;
+               CONT;
+       LD_IMM_DW:
++              gmb();
+               DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
+               insn++;
+               CONT;
+@@ -1133,6 +1135,7 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
+               *(SIZE *)(unsigned long) (DST + insn->off) = IMM;       \
+               CONT;                                                   \
+       LDX_MEM_##SIZEOP:                                               \
++              gmb();                                                  \
+               DST = *(SIZE *)(unsigned long) (SRC + insn->off);       \
+               CONT;
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0261-x86-bpf-jit-prevent-speculative-execution-when-JIT-i.patch b/patches/kernel/0261-x86-bpf-jit-prevent-speculative-execution-when-JIT-i.patch

deleted file mode 100644 (file)

index 5e11a7b..0000000
--- a/patches/kernel/0261-x86-bpf-jit-prevent-speculative-execution-when-JIT-i.patch
+++ /dev/null
@@ -1,93 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:45 +0300
-Subject: [PATCH] x86, bpf, jit: prevent speculative execution when JIT is
- enabled
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-When constant blinding is enabled (bpf_jit_harden = 1), this adds
-a generic memory barrier (lfence for intel, mfence for AMD) before
-emitting x86 jitted code for the BPF_ALU(64)_OR_X and BPF_ALU_LHS_X
-(for BPF_REG_AX register) eBPF instructions. This is needed in order
-to prevent speculative execution on out of bounds BPF_MAP array
-indexes when JIT is enabled. This way an arbitary kernel memory is
-not exposed through side-channel attacks.
-
-For more details, please see this Google Project Zero report: tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit cf9676859a05d0d784067072e8121e63888bacc7)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/net/bpf_jit_comp.c | 33 ++++++++++++++++++++++++++++++++-
- 1 file changed, 32 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
-index 4d50ced94686..879dbfefb66d 100644
---- a/arch/x86/net/bpf_jit_comp.c
-+++ b/arch/x86/net/bpf_jit_comp.c
-@@ -107,6 +107,27 @@ static void bpf_flush_icache(void *start, void *end)
-       set_fs(old_fs);
- }
- 
-+static void emit_memory_barrier(u8 **pprog)
-+{
-+      u8 *prog = *pprog;
-+      int cnt = 0;
-+
-+      if (bpf_jit_blinding_enabled()) {
-+              if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
-+                      /* x86 LFENCE opcode 0F AE E8 */
-+                      EMIT3(0x0f, 0xae, 0xe8);
-+              else if (boot_cpu_has(X86_FEATURE_MFENCE_RDTSC))
-+                      /* AMD MFENCE opcode 0F AE F0 */
-+                      EMIT3(0x0f, 0xae, 0xf0);
-+              else
-+                      /* we should never end up here,
-+                       * but if we do, better not to emit anything*/
-+                      return;
-+      }
-+      *pprog = prog;
-+      return;
-+}
-+
- #define CHOOSE_LOAD_FUNC(K, func) \
-       ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
- 
-@@ -399,7 +420,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
-                       case BPF_ADD: b2 = 0x01; break;
-                       case BPF_SUB: b2 = 0x29; break;
-                       case BPF_AND: b2 = 0x21; break;
--                      case BPF_OR: b2 = 0x09; break;
-+                      case BPF_OR: b2 = 0x09; emit_memory_barrier(&prog); break;
-                       case BPF_XOR: b2 = 0x31; break;
-                       }
-                       if (BPF_CLASS(insn->code) == BPF_ALU64)
-@@ -646,6 +667,16 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
-               case BPF_ALU64 | BPF_RSH | BPF_X:
-               case BPF_ALU64 | BPF_ARSH | BPF_X:
- 
-+                      /* If blinding is enabled, each
-+                       * BPF_LD | BPF_IMM | BPF_DW instruction
-+                       * is converted to 4 eBPF instructions with
-+                       * BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32)
-+                       * always present(number 3). Detect such cases
-+                       * and insert memory barriers. */
-+                      if ((BPF_CLASS(insn->code) == BPF_ALU64)
-+                              && (BPF_OP(insn->code) == BPF_LSH)
-+                              && (src_reg == BPF_REG_AX))
-+                              emit_memory_barrier(&prog);
-                       /* check for bad case when dst_reg == rcx */
-                       if (dst_reg == BPF_REG_4) {
-                               /* mov r11, dst_reg */
--- 
-2.14.2
-
diff --git a/patches/kernel/0262-uvcvideo-prevent-speculative-execution.patch b/patches/kernel/0262-uvcvideo-prevent-speculative-execution.patch

deleted file mode 100644 (file)

index f4d5128..0000000
--- a/patches/kernel/0262-uvcvideo-prevent-speculative-execution.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:46 +0300
-Subject: [PATCH] uvcvideo: prevent speculative execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-real commit text tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 65d4588b16395360695525add0ca79fa6ba04fa5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- drivers/media/usb/uvc/uvc_v4l2.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c
-index 3e7e283a44a8..fcedd1798e9d 100644
---- a/drivers/media/usb/uvc/uvc_v4l2.c
-+++ b/drivers/media/usb/uvc/uvc_v4l2.c
-@@ -821,6 +821,7 @@ static int uvc_ioctl_enum_input(struct file *file, void *fh,
-               }
-               pin = iterm->id;
-       } else if (index < selector->bNrInPins) {
-+              gmb();
-               pin = selector->baSourceID[index];
-               list_for_each_entry(iterm, &chain->entities, chain) {
-                       if (!UVC_ENTITY_IS_ITERM(iterm))
--- 
-2.14.2
-
diff --git a/patches/kernel/0262-x86-bpf-jit-prevent-speculative-execution-when-JIT-i.patch b/patches/kernel/0262-x86-bpf-jit-prevent-speculative-execution-when-JIT-i.patch

new file mode 100644 (file)

index 0000000..5e11a7b
--- /dev/null
+++ b/patches/kernel/0262-x86-bpf-jit-prevent-speculative-execution-when-JIT-i.patch
@@ -0,0 +1,93 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:45 +0300
+Subject: [PATCH] x86, bpf, jit: prevent speculative execution when JIT is
+ enabled
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+When constant blinding is enabled (bpf_jit_harden = 1), this adds
+a generic memory barrier (lfence for intel, mfence for AMD) before
+emitting x86 jitted code for the BPF_ALU(64)_OR_X and BPF_ALU_LHS_X
+(for BPF_REG_AX register) eBPF instructions. This is needed in order
+to prevent speculative execution on out of bounds BPF_MAP array
+indexes when JIT is enabled. This way an arbitary kernel memory is
+not exposed through side-channel attacks.
+
+For more details, please see this Google Project Zero report: tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit cf9676859a05d0d784067072e8121e63888bacc7)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/net/bpf_jit_comp.c | 33 ++++++++++++++++++++++++++++++++-
+ 1 file changed, 32 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
+index 4d50ced94686..879dbfefb66d 100644
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -107,6 +107,27 @@ static void bpf_flush_icache(void *start, void *end)
+       set_fs(old_fs);
+ }
+ 
++static void emit_memory_barrier(u8 **pprog)
++{
++      u8 *prog = *pprog;
++      int cnt = 0;
++
++      if (bpf_jit_blinding_enabled()) {
++              if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
++                      /* x86 LFENCE opcode 0F AE E8 */
++                      EMIT3(0x0f, 0xae, 0xe8);
++              else if (boot_cpu_has(X86_FEATURE_MFENCE_RDTSC))
++                      /* AMD MFENCE opcode 0F AE F0 */
++                      EMIT3(0x0f, 0xae, 0xf0);
++              else
++                      /* we should never end up here,
++                       * but if we do, better not to emit anything*/
++                      return;
++      }
++      *pprog = prog;
++      return;
++}
++
+ #define CHOOSE_LOAD_FUNC(K, func) \
+       ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
+ 
+@@ -399,7 +420,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+                       case BPF_ADD: b2 = 0x01; break;
+                       case BPF_SUB: b2 = 0x29; break;
+                       case BPF_AND: b2 = 0x21; break;
+-                      case BPF_OR: b2 = 0x09; break;
++                      case BPF_OR: b2 = 0x09; emit_memory_barrier(&prog); break;
+                       case BPF_XOR: b2 = 0x31; break;
+                       }
+                       if (BPF_CLASS(insn->code) == BPF_ALU64)
+@@ -646,6 +667,16 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+               case BPF_ALU64 | BPF_RSH | BPF_X:
+               case BPF_ALU64 | BPF_ARSH | BPF_X:
+ 
++                      /* If blinding is enabled, each
++                       * BPF_LD | BPF_IMM | BPF_DW instruction
++                       * is converted to 4 eBPF instructions with
++                       * BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32)
++                       * always present(number 3). Detect such cases
++                       * and insert memory barriers. */
++                      if ((BPF_CLASS(insn->code) == BPF_ALU64)
++                              && (BPF_OP(insn->code) == BPF_LSH)
++                              && (src_reg == BPF_REG_AX))
++                              emit_memory_barrier(&prog);
+                       /* check for bad case when dst_reg == rcx */
+                       if (dst_reg == BPF_REG_4) {
+                               /* mov r11, dst_reg */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0263-carl9170-prevent-speculative-execution.patch b/patches/kernel/0263-carl9170-prevent-speculative-execution.patch

deleted file mode 100644 (file)

index aa83932..0000000
--- a/patches/kernel/0263-carl9170-prevent-speculative-execution.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:47 +0300
-Subject: [PATCH] carl9170: prevent speculative execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Real commit text tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit dc218eba4fe8241ab073be41a068f6796450c6d0)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- drivers/net/wireless/ath/carl9170/main.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
-index 988c8857d78c..7e2c1c870a1d 100644
---- a/drivers/net/wireless/ath/carl9170/main.c
-+++ b/drivers/net/wireless/ath/carl9170/main.c
-@@ -1388,6 +1388,7 @@ static int carl9170_op_conf_tx(struct ieee80211_hw *hw,
- 
-       mutex_lock(&ar->mutex);
-       if (queue < ar->hw->queues) {
-+              gmb();
-               memcpy(&ar->edcf[ar9170_qmap[queue]], param, sizeof(*param));
-               ret = carl9170_set_qos(ar);
-       } else {
--- 
-2.14.2
-
diff --git a/patches/kernel/0263-uvcvideo-prevent-speculative-execution.patch b/patches/kernel/0263-uvcvideo-prevent-speculative-execution.patch

new file mode 100644 (file)

index 0000000..f4d5128
--- /dev/null
+++ b/patches/kernel/0263-uvcvideo-prevent-speculative-execution.patch
@@ -0,0 +1,38 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:46 +0300
+Subject: [PATCH] uvcvideo: prevent speculative execution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 65d4588b16395360695525add0ca79fa6ba04fa5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/media/usb/uvc/uvc_v4l2.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c
+index 3e7e283a44a8..fcedd1798e9d 100644
+--- a/drivers/media/usb/uvc/uvc_v4l2.c
++++ b/drivers/media/usb/uvc/uvc_v4l2.c
+@@ -821,6 +821,7 @@ static int uvc_ioctl_enum_input(struct file *file, void *fh,
+               }
+               pin = iterm->id;
+       } else if (index < selector->bNrInPins) {
++              gmb();
+               pin = selector->baSourceID[index];
+               list_for_each_entry(iterm, &chain->entities, chain) {
+                       if (!UVC_ENTITY_IS_ITERM(iterm))
+-- 
+2.14.2
+
diff --git a/patches/kernel/0264-carl9170-prevent-speculative-execution.patch b/patches/kernel/0264-carl9170-prevent-speculative-execution.patch

new file mode 100644 (file)

index 0000000..aa83932
--- /dev/null
+++ b/patches/kernel/0264-carl9170-prevent-speculative-execution.patch
@@ -0,0 +1,38 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:47 +0300
+Subject: [PATCH] carl9170: prevent speculative execution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit dc218eba4fe8241ab073be41a068f6796450c6d0)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/net/wireless/ath/carl9170/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
+index 988c8857d78c..7e2c1c870a1d 100644
+--- a/drivers/net/wireless/ath/carl9170/main.c
++++ b/drivers/net/wireless/ath/carl9170/main.c
+@@ -1388,6 +1388,7 @@ static int carl9170_op_conf_tx(struct ieee80211_hw *hw,
+ 
+       mutex_lock(&ar->mutex);
+       if (queue < ar->hw->queues) {
++              gmb();
+               memcpy(&ar->edcf[ar9170_qmap[queue]], param, sizeof(*param));
+               ret = carl9170_set_qos(ar);
+       } else {
+-- 
+2.14.2
+
diff --git a/patches/kernel/0264-p54-prevent-speculative-execution.patch b/patches/kernel/0264-p54-prevent-speculative-execution.patch

deleted file mode 100644 (file)

index 3d9ba34..0000000
--- a/patches/kernel/0264-p54-prevent-speculative-execution.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:48 +0300
-Subject: [PATCH] p54: prevent speculative execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Real commit text tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 57b537e161bb9d44475a05b2b12d64bfb50319d3)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- drivers/net/wireless/intersil/p54/main.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/drivers/net/wireless/intersil/p54/main.c b/drivers/net/wireless/intersil/p54/main.c
-index d5a3bf91a03e..7e6af1f67960 100644
---- a/drivers/net/wireless/intersil/p54/main.c
-+++ b/drivers/net/wireless/intersil/p54/main.c
-@@ -415,6 +415,7 @@ static int p54_conf_tx(struct ieee80211_hw *dev,
- 
-       mutex_lock(&priv->conf_mutex);
-       if (queue < dev->queues) {
-+              gmb();
-               P54_SET_QUEUE(priv->qos_params[queue], params->aifs,
-                       params->cw_min, params->cw_max, params->txop);
-               ret = p54_set_edcf(priv);
--- 
-2.14.2
-
diff --git a/patches/kernel/0265-p54-prevent-speculative-execution.patch b/patches/kernel/0265-p54-prevent-speculative-execution.patch

new file mode 100644 (file)

index 0000000..3d9ba34
--- /dev/null
+++ b/patches/kernel/0265-p54-prevent-speculative-execution.patch
@@ -0,0 +1,38 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:48 +0300
+Subject: [PATCH] p54: prevent speculative execution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 57b537e161bb9d44475a05b2b12d64bfb50319d3)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/net/wireless/intersil/p54/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/wireless/intersil/p54/main.c b/drivers/net/wireless/intersil/p54/main.c
+index d5a3bf91a03e..7e6af1f67960 100644
+--- a/drivers/net/wireless/intersil/p54/main.c
++++ b/drivers/net/wireless/intersil/p54/main.c
+@@ -415,6 +415,7 @@ static int p54_conf_tx(struct ieee80211_hw *dev,
+ 
+       mutex_lock(&priv->conf_mutex);
+       if (queue < dev->queues) {
++              gmb();
+               P54_SET_QUEUE(priv->qos_params[queue], params->aifs,
+                       params->cw_min, params->cw_max, params->txop);
+               ret = p54_set_edcf(priv);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0265-qla2xxx-prevent-speculative-execution.patch b/patches/kernel/0265-qla2xxx-prevent-speculative-execution.patch

deleted file mode 100644 (file)

index 527c7f9..0000000
--- a/patches/kernel/0265-qla2xxx-prevent-speculative-execution.patch
+++ /dev/null
@@ -1,60 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:49 +0300
-Subject: [PATCH] qla2xxx: prevent speculative execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Real commit text tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d71318e5f16371dbc0e89a786336a521551f8946)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- drivers/scsi/qla2xxx/qla_mr.c | 12 ++++++++----
- 1 file changed, 8 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
-index 10b742d27e16..ca923d8803f9 100644
---- a/drivers/scsi/qla2xxx/qla_mr.c
-+++ b/drivers/scsi/qla2xxx/qla_mr.c
-@@ -2304,10 +2304,12 @@ qlafx00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
-       req = ha->req_q_map[que];
- 
-       /* Validate handle. */
--      if (handle < req->num_outstanding_cmds)
-+      if (handle < req->num_outstanding_cmds) {
-+              gmb();
-               sp = req->outstanding_cmds[handle];
--      else
-+      } else {
-               sp = NULL;
-+      }
- 
-       if (sp == NULL) {
-               ql_dbg(ql_dbg_io, vha, 0x3034,
-@@ -2655,10 +2657,12 @@ qlafx00_multistatus_entry(struct scsi_qla_host *vha,
-               req = ha->req_q_map[que];
- 
-               /* Validate handle. */
--              if (handle < req->num_outstanding_cmds)
-+              if (handle < req->num_outstanding_cmds) {
-+                      gmb();
-                       sp = req->outstanding_cmds[handle];
--              else
-+              } else {
-                       sp = NULL;
-+              }
- 
-               if (sp == NULL) {
-                       ql_dbg(ql_dbg_io, vha, 0x3044,
--- 
-2.14.2
-
diff --git a/patches/kernel/0266-cw1200-prevent-speculative-execution.patch b/patches/kernel/0266-cw1200-prevent-speculative-execution.patch

deleted file mode 100644 (file)

index 05a4767..0000000
--- a/patches/kernel/0266-cw1200-prevent-speculative-execution.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:50 +0300
-Subject: [PATCH] cw1200: prevent speculative execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Real commit text tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 30770297508b781f2c1e82c52f793bc4d2cb2356)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- drivers/net/wireless/st/cw1200/sta.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c
-index a52224836a2b..bbff06a4263e 100644
---- a/drivers/net/wireless/st/cw1200/sta.c
-+++ b/drivers/net/wireless/st/cw1200/sta.c
-@@ -619,6 +619,7 @@ int cw1200_conf_tx(struct ieee80211_hw *dev, struct ieee80211_vif *vif,
-       mutex_lock(&priv->conf_mutex);
- 
-       if (queue < dev->queues) {
-+              gmb();
-               old_uapsd_flags = le16_to_cpu(priv->uapsd_info.uapsd_flags);
- 
-               WSM_TX_QUEUE_SET(&priv->tx_queue_params, queue, 0, 0, 0);
--- 
-2.14.2
-
diff --git a/patches/kernel/0266-qla2xxx-prevent-speculative-execution.patch b/patches/kernel/0266-qla2xxx-prevent-speculative-execution.patch

new file mode 100644 (file)

index 0000000..527c7f9
--- /dev/null
+++ b/patches/kernel/0266-qla2xxx-prevent-speculative-execution.patch
@@ -0,0 +1,60 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:49 +0300
+Subject: [PATCH] qla2xxx: prevent speculative execution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d71318e5f16371dbc0e89a786336a521551f8946)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/scsi/qla2xxx/qla_mr.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
+index 10b742d27e16..ca923d8803f9 100644
+--- a/drivers/scsi/qla2xxx/qla_mr.c
++++ b/drivers/scsi/qla2xxx/qla_mr.c
+@@ -2304,10 +2304,12 @@ qlafx00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
+       req = ha->req_q_map[que];
+ 
+       /* Validate handle. */
+-      if (handle < req->num_outstanding_cmds)
++      if (handle < req->num_outstanding_cmds) {
++              gmb();
+               sp = req->outstanding_cmds[handle];
+-      else
++      } else {
+               sp = NULL;
++      }
+ 
+       if (sp == NULL) {
+               ql_dbg(ql_dbg_io, vha, 0x3034,
+@@ -2655,10 +2657,12 @@ qlafx00_multistatus_entry(struct scsi_qla_host *vha,
+               req = ha->req_q_map[que];
+ 
+               /* Validate handle. */
+-              if (handle < req->num_outstanding_cmds)
++              if (handle < req->num_outstanding_cmds) {
++                      gmb();
+                       sp = req->outstanding_cmds[handle];
+-              else
++              } else {
+                       sp = NULL;
++              }
+ 
+               if (sp == NULL) {
+                       ql_dbg(ql_dbg_io, vha, 0x3044,
+-- 
+2.14.2
+
diff --git a/patches/kernel/0267-Thermal-int340x-prevent-speculative-execution.patch b/patches/kernel/0267-Thermal-int340x-prevent-speculative-execution.patch

deleted file mode 100644 (file)

index 58d560d..0000000
--- a/patches/kernel/0267-Thermal-int340x-prevent-speculative-execution.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:51 +0300
-Subject: [PATCH] Thermal/int340x: prevent speculative execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Real commit text tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 3904f4cadeeaa9370f0635eb2f66194ca238325b)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- drivers/thermal/int340x_thermal/int340x_thermal_zone.c | 11 ++++++-----
- 1 file changed, 6 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c
-index 145a5c53ff5c..4f9917ef3c11 100644
---- a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c
-+++ b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c
-@@ -57,15 +57,16 @@ static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone,
-       if (d->override_ops && d->override_ops->get_trip_temp)
-               return d->override_ops->get_trip_temp(zone, trip, temp);
- 
--      if (trip < d->aux_trip_nr)
-+      if (trip < d->aux_trip_nr) {
-+              gmb();
-               *temp = d->aux_trips[trip];
--      else if (trip == d->crt_trip_id)
-+      } else if (trip == d->crt_trip_id) {
-               *temp = d->crt_temp;
--      else if (trip == d->psv_trip_id)
-+      } else if (trip == d->psv_trip_id) {
-               *temp = d->psv_temp;
--      else if (trip == d->hot_trip_id)
-+      } else if (trip == d->hot_trip_id) {
-               *temp = d->hot_temp;
--      else {
-+      } else {
-               for (i = 0; i < INT340X_THERMAL_MAX_ACT_TRIP_COUNT; i++) {
-                       if (d->act_trips[i].valid &&
-                           d->act_trips[i].id == trip) {
--- 
-2.14.2
-
diff --git a/patches/kernel/0267-cw1200-prevent-speculative-execution.patch b/patches/kernel/0267-cw1200-prevent-speculative-execution.patch

new file mode 100644 (file)

index 0000000..05a4767
--- /dev/null
+++ b/patches/kernel/0267-cw1200-prevent-speculative-execution.patch
@@ -0,0 +1,38 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:50 +0300
+Subject: [PATCH] cw1200: prevent speculative execution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 30770297508b781f2c1e82c52f793bc4d2cb2356)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/net/wireless/st/cw1200/sta.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c
+index a52224836a2b..bbff06a4263e 100644
+--- a/drivers/net/wireless/st/cw1200/sta.c
++++ b/drivers/net/wireless/st/cw1200/sta.c
+@@ -619,6 +619,7 @@ int cw1200_conf_tx(struct ieee80211_hw *dev, struct ieee80211_vif *vif,
+       mutex_lock(&priv->conf_mutex);
+ 
+       if (queue < dev->queues) {
++              gmb();
+               old_uapsd_flags = le16_to_cpu(priv->uapsd_info.uapsd_flags);
+ 
+               WSM_TX_QUEUE_SET(&priv->tx_queue_params, queue, 0, 0, 0);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0268-Thermal-int340x-prevent-speculative-execution.patch b/patches/kernel/0268-Thermal-int340x-prevent-speculative-execution.patch

new file mode 100644 (file)

index 0000000..58d560d
--- /dev/null
+++ b/patches/kernel/0268-Thermal-int340x-prevent-speculative-execution.patch
@@ -0,0 +1,52 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:51 +0300
+Subject: [PATCH] Thermal/int340x: prevent speculative execution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 3904f4cadeeaa9370f0635eb2f66194ca238325b)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/thermal/int340x_thermal/int340x_thermal_zone.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c
+index 145a5c53ff5c..4f9917ef3c11 100644
+--- a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c
++++ b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c
+@@ -57,15 +57,16 @@ static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone,
+       if (d->override_ops && d->override_ops->get_trip_temp)
+               return d->override_ops->get_trip_temp(zone, trip, temp);
+ 
+-      if (trip < d->aux_trip_nr)
++      if (trip < d->aux_trip_nr) {
++              gmb();
+               *temp = d->aux_trips[trip];
+-      else if (trip == d->crt_trip_id)
++      } else if (trip == d->crt_trip_id) {
+               *temp = d->crt_temp;
+-      else if (trip == d->psv_trip_id)
++      } else if (trip == d->psv_trip_id) {
+               *temp = d->psv_temp;
+-      else if (trip == d->hot_trip_id)
++      } else if (trip == d->hot_trip_id) {
+               *temp = d->hot_temp;
+-      else {
++      } else {
+               for (i = 0; i < INT340X_THERMAL_MAX_ACT_TRIP_COUNT; i++) {
+                       if (d->act_trips[i].valid &&
+                           d->act_trips[i].id == trip) {
+-- 
+2.14.2
+
diff --git a/patches/kernel/0268-userns-prevent-speculative-execution.patch b/patches/kernel/0268-userns-prevent-speculative-execution.patch

deleted file mode 100644 (file)

index 4854b18..0000000
--- a/patches/kernel/0268-userns-prevent-speculative-execution.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:52 +0300
-Subject: [PATCH] userns: prevent speculative execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Real commit text tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 1410678db6238e625775f7108c68a9e5b8d439a1)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- kernel/user_namespace.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
-index 4eacf186f5bc..684cc69d431c 100644
---- a/kernel/user_namespace.c
-+++ b/kernel/user_namespace.c
-@@ -549,8 +549,10 @@ static void *m_start(struct seq_file *seq, loff_t *ppos,
-       struct uid_gid_extent *extent = NULL;
-       loff_t pos = *ppos;
- 
--      if (pos < map->nr_extents)
-+      if (pos < map->nr_extents) {
-+              gmb();
-               extent = &map->extent[pos];
-+      }
- 
-       return extent;
- }
--- 
-2.14.2
-
diff --git a/patches/kernel/0269-ipv6-prevent-speculative-execution.patch b/patches/kernel/0269-ipv6-prevent-speculative-execution.patch

deleted file mode 100644 (file)

index a5fa697..0000000
--- a/patches/kernel/0269-ipv6-prevent-speculative-execution.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:53 +0300
-Subject: [PATCH] ipv6: prevent speculative execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Real commit text tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit fdb98114a31aa5c0083bd7cd5b42ea569b6f77dc)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- net/ipv6/raw.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
-index 60be012fe708..1a0eae661512 100644
---- a/net/ipv6/raw.c
-+++ b/net/ipv6/raw.c
-@@ -726,6 +726,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
-       if (offset < rfv->hlen) {
-               int copy = min(rfv->hlen - offset, len);
- 
-+              gmb();
-               if (skb->ip_summed == CHECKSUM_PARTIAL)
-                       memcpy(to, rfv->c + offset, copy);
-               else
--- 
-2.14.2
-
diff --git a/patches/kernel/0269-userns-prevent-speculative-execution.patch b/patches/kernel/0269-userns-prevent-speculative-execution.patch

new file mode 100644 (file)

index 0000000..4854b18
--- /dev/null
+++ b/patches/kernel/0269-userns-prevent-speculative-execution.patch
@@ -0,0 +1,42 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:52 +0300
+Subject: [PATCH] userns: prevent speculative execution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 1410678db6238e625775f7108c68a9e5b8d439a1)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ kernel/user_namespace.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index 4eacf186f5bc..684cc69d431c 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -549,8 +549,10 @@ static void *m_start(struct seq_file *seq, loff_t *ppos,
+       struct uid_gid_extent *extent = NULL;
+       loff_t pos = *ppos;
+ 
+-      if (pos < map->nr_extents)
++      if (pos < map->nr_extents) {
++              gmb();
+               extent = &map->extent[pos];
++      }
+ 
+       return extent;
+ }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0270-fs-prevent-speculative-execution.patch b/patches/kernel/0270-fs-prevent-speculative-execution.patch

deleted file mode 100644 (file)

index bf85225..0000000
--- a/patches/kernel/0270-fs-prevent-speculative-execution.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:54 +0300
-Subject: [PATCH] fs: prevent speculative execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Real commit text tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 1ca9e14b253a501f055c3ea29d992c028473676e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- include/linux/fdtable.h | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
-index 6e84b2cae6ad..09b124542bb8 100644
---- a/include/linux/fdtable.h
-+++ b/include/linux/fdtable.h
-@@ -81,8 +81,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i
- {
-       struct fdtable *fdt = rcu_dereference_raw(files->fdt);
- 
--      if (fd < fdt->max_fds)
-+      if (fd < fdt->max_fds) {
-+              gmb();
-               return rcu_dereference_raw(fdt->fd[fd]);
-+      }
-       return NULL;
- }
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0270-ipv6-prevent-speculative-execution.patch b/patches/kernel/0270-ipv6-prevent-speculative-execution.patch

new file mode 100644 (file)

index 0000000..a5fa697
--- /dev/null
+++ b/patches/kernel/0270-ipv6-prevent-speculative-execution.patch
@@ -0,0 +1,38 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:53 +0300
+Subject: [PATCH] ipv6: prevent speculative execution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit fdb98114a31aa5c0083bd7cd5b42ea569b6f77dc)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ net/ipv6/raw.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
+index 60be012fe708..1a0eae661512 100644
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -726,6 +726,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
+       if (offset < rfv->hlen) {
+               int copy = min(rfv->hlen - offset, len);
+ 
++              gmb();
+               if (skb->ip_summed == CHECKSUM_PARTIAL)
+                       memcpy(to, rfv->c + offset, copy);
+               else
+-- 
+2.14.2
+
diff --git a/patches/kernel/0271-fs-prevent-speculative-execution.patch b/patches/kernel/0271-fs-prevent-speculative-execution.patch

new file mode 100644 (file)

index 0000000..bf85225
--- /dev/null
+++ b/patches/kernel/0271-fs-prevent-speculative-execution.patch
@@ -0,0 +1,42 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:54 +0300
+Subject: [PATCH] fs: prevent speculative execution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 1ca9e14b253a501f055c3ea29d992c028473676e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ include/linux/fdtable.h | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
+index 6e84b2cae6ad..09b124542bb8 100644
+--- a/include/linux/fdtable.h
++++ b/include/linux/fdtable.h
+@@ -81,8 +81,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i
+ {
+       struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+ 
+-      if (fd < fdt->max_fds)
++      if (fd < fdt->max_fds) {
++              gmb();
+               return rcu_dereference_raw(fdt->fd[fd]);
++      }
+       return NULL;
+ }
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0271-net-mpls-prevent-speculative-execution.patch b/patches/kernel/0271-net-mpls-prevent-speculative-execution.patch

deleted file mode 100644 (file)

index cc840c9..0000000
--- a/patches/kernel/0271-net-mpls-prevent-speculative-execution.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:55 +0300
-Subject: [PATCH] net: mpls: prevent speculative execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Real commit text tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 98a9550398f87c5430d5e893104e21caa1e2e8d3)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- net/mpls/af_mpls.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
-index ea4f481839dd..08dfb99e19f2 100644
---- a/net/mpls/af_mpls.c
-+++ b/net/mpls/af_mpls.c
-@@ -50,6 +50,8 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
-       if (index < net->mpls.platform_labels) {
-               struct mpls_route __rcu **platform_label =
-                       rcu_dereference(net->mpls.platform_label);
-+
-+              gmb();
-               rt = rcu_dereference(platform_label[index]);
-       }
-       return rt;
--- 
-2.14.2
-
diff --git a/patches/kernel/0272-net-mpls-prevent-speculative-execution.patch b/patches/kernel/0272-net-mpls-prevent-speculative-execution.patch

new file mode 100644 (file)

index 0000000..cc840c9
--- /dev/null
+++ b/patches/kernel/0272-net-mpls-prevent-speculative-execution.patch
@@ -0,0 +1,39 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:55 +0300
+Subject: [PATCH] net: mpls: prevent speculative execution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 98a9550398f87c5430d5e893104e21caa1e2e8d3)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ net/mpls/af_mpls.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
+index ea4f481839dd..08dfb99e19f2 100644
+--- a/net/mpls/af_mpls.c
++++ b/net/mpls/af_mpls.c
+@@ -50,6 +50,8 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
+       if (index < net->mpls.platform_labels) {
+               struct mpls_route __rcu **platform_label =
+                       rcu_dereference(net->mpls.platform_label);
++
++              gmb();
+               rt = rcu_dereference(platform_label[index]);
+       }
+       return rt;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0272-udf-prevent-speculative-execution.patch b/patches/kernel/0272-udf-prevent-speculative-execution.patch

deleted file mode 100644 (file)

index 0287316..0000000
--- a/patches/kernel/0272-udf-prevent-speculative-execution.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Elena Reshetova <elena.reshetova@intel.com>
-Date: Mon, 4 Sep 2017 13:11:56 +0300
-Subject: [PATCH] udf: prevent speculative execution
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Real commit text tbd
-
-Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f575840dd363aa80a14faacddf90b95db1185e2c)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- fs/udf/misc.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/fs/udf/misc.c b/fs/udf/misc.c
-index 3949c4bec3a3..4bd10b2e8540 100644
---- a/fs/udf/misc.c
-+++ b/fs/udf/misc.c
-@@ -104,6 +104,8 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
-                                       iinfo->i_lenEAttr) {
-                               uint32_t aal =
-                                       le32_to_cpu(eahd->appAttrLocation);
-+
-+                              gmb();
-                               memmove(&ea[offset - aal + size],
-                                       &ea[aal], offset - aal);
-                               offset -= aal;
-@@ -114,6 +116,8 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
-                                       iinfo->i_lenEAttr) {
-                               uint32_t ial =
-                                       le32_to_cpu(eahd->impAttrLocation);
-+
-+                              gmb();
-                               memmove(&ea[offset - ial + size],
-                                       &ea[ial], offset - ial);
-                               offset -= ial;
-@@ -125,6 +129,8 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
-                                       iinfo->i_lenEAttr) {
-                               uint32_t aal =
-                                       le32_to_cpu(eahd->appAttrLocation);
-+
-+                              gmb();
-                               memmove(&ea[offset - aal + size],
-                                       &ea[aal], offset - aal);
-                               offset -= aal;
--- 
-2.14.2
-
diff --git a/patches/kernel/0273-udf-prevent-speculative-execution.patch b/patches/kernel/0273-udf-prevent-speculative-execution.patch

new file mode 100644 (file)

index 0000000..0287316
--- /dev/null
+++ b/patches/kernel/0273-udf-prevent-speculative-execution.patch
@@ -0,0 +1,57 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Elena Reshetova <elena.reshetova@intel.com>
+Date: Mon, 4 Sep 2017 13:11:56 +0300
+Subject: [PATCH] udf: prevent speculative execution
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Real commit text tbd
+
+Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f575840dd363aa80a14faacddf90b95db1185e2c)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ fs/udf/misc.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/fs/udf/misc.c b/fs/udf/misc.c
+index 3949c4bec3a3..4bd10b2e8540 100644
+--- a/fs/udf/misc.c
++++ b/fs/udf/misc.c
+@@ -104,6 +104,8 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
+                                       iinfo->i_lenEAttr) {
+                               uint32_t aal =
+                                       le32_to_cpu(eahd->appAttrLocation);
++
++                              gmb();
+                               memmove(&ea[offset - aal + size],
+                                       &ea[aal], offset - aal);
+                               offset -= aal;
+@@ -114,6 +116,8 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
+                                       iinfo->i_lenEAttr) {
+                               uint32_t ial =
+                                       le32_to_cpu(eahd->impAttrLocation);
++
++                              gmb();
+                               memmove(&ea[offset - ial + size],
+                                       &ea[ial], offset - ial);
+                               offset -= ial;
+@@ -125,6 +129,8 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
+                                       iinfo->i_lenEAttr) {
+                               uint32_t aal =
+                                       le32_to_cpu(eahd->appAttrLocation);
++
++                              gmb();
+                               memmove(&ea[offset - aal + size],
+                                       &ea[aal], offset - aal);
+                               offset -= aal;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0273-x86-feature-Enable-the-x86-feature-to-control-Specul.patch b/patches/kernel/0273-x86-feature-Enable-the-x86-feature-to-control-Specul.patch

deleted file mode 100644 (file)

index 2284c1b..0000000
--- a/patches/kernel/0273-x86-feature-Enable-the-x86-feature-to-control-Specul.patch
+++ /dev/null
@@ -1,77 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Thu, 24 Aug 2017 09:34:41 -0700
-Subject: [PATCH] x86/feature: Enable the x86 feature to control Speculation
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-cpuid ax=0x7, return rdx bit 26 to indicate presence of this feature
-IA32_SPEC_CTRL (0x48) and IA32_PRED_CMD (0x49)
-IA32_SPEC_CTRL, bit0 – Indirect Branch Restricted Speculation (IBRS)
-IA32_PRED_CMD,  bit0 – Indirect Branch Prediction Barrier (IBPB)
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit f1f160a92b70c25d6e6e76788463bbec86a73313)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cpufeatures.h | 1 +
- arch/x86/include/asm/msr-index.h   | 5 +++++
- arch/x86/kernel/cpu/scattered.c    | 1 +
- 3 files changed, 7 insertions(+)
-
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index 3928050b51b0..44be8fd069bf 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -208,6 +208,7 @@
- #define X86_FEATURE_AVX512_4FMAPS     ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
- 
- #define X86_FEATURE_MBA                       ( 7*32+18) /* Memory Bandwidth Allocation */
-+#define X86_FEATURE_SPEC_CTRL         ( 7*32+19) /* Control Speculation Control */
- 
- /* Virtualization flags: Linux defined, word 8 */
- #define X86_FEATURE_TPR_SHADOW                ( 8*32+ 0) /* Intel TPR Shadow */
-diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
-index db88b7f852b4..4e3438a00a50 100644
---- a/arch/x86/include/asm/msr-index.h
-+++ b/arch/x86/include/asm/msr-index.h
-@@ -41,6 +41,9 @@
- #define MSR_PPIN_CTL                  0x0000004e
- #define MSR_PPIN                      0x0000004f
- 
-+#define MSR_IA32_SPEC_CTRL            0x00000048
-+#define MSR_IA32_PRED_CMD             0x00000049
-+
- #define MSR_IA32_PERFCTR0             0x000000c1
- #define MSR_IA32_PERFCTR1             0x000000c2
- #define MSR_FSB_FREQ                  0x000000cd
-@@ -437,6 +440,8 @@
- #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX      (1<<1)
- #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX     (1<<2)
- #define FEATURE_CONTROL_LMCE                          (1<<20)
-+#define FEATURE_ENABLE_IBRS                           (1<<0)
-+#define FEATURE_SET_IBPB                              (1<<0)
- 
- #define MSR_IA32_APICBASE             0x0000001b
- #define MSR_IA32_APICBASE_BSP         (1<<8)
-diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
-index 23c23508c012..9651ea395812 100644
---- a/arch/x86/kernel/cpu/scattered.c
-+++ b/arch/x86/kernel/cpu/scattered.c
-@@ -24,6 +24,7 @@ static const struct cpuid_bit cpuid_bits[] = {
-       { X86_FEATURE_INTEL_PT,         CPUID_EBX, 25, 0x00000007, 0 },
-       { X86_FEATURE_AVX512_4VNNIW,    CPUID_EDX,  2, 0x00000007, 0 },
-       { X86_FEATURE_AVX512_4FMAPS,    CPUID_EDX,  3, 0x00000007, 0 },
-+      { X86_FEATURE_SPEC_CTRL,        CPUID_EDX, 26, 0x00000007, 0 },
-       { X86_FEATURE_CAT_L3,           CPUID_EBX,  1, 0x00000010, 0 },
-       { X86_FEATURE_CAT_L2,           CPUID_EBX,  2, 0x00000010, 0 },
-       { X86_FEATURE_CDP_L3,           CPUID_ECX,  2, 0x00000010, 1 },
--- 
-2.14.2
-
diff --git a/patches/kernel/0274-x86-feature-Enable-the-x86-feature-to-control-Specul.patch b/patches/kernel/0274-x86-feature-Enable-the-x86-feature-to-control-Specul.patch

new file mode 100644 (file)

index 0000000..2284c1b
--- /dev/null
+++ b/patches/kernel/0274-x86-feature-Enable-the-x86-feature-to-control-Specul.patch
@@ -0,0 +1,77 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Thu, 24 Aug 2017 09:34:41 -0700
+Subject: [PATCH] x86/feature: Enable the x86 feature to control Speculation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+cpuid ax=0x7, return rdx bit 26 to indicate presence of this feature
+IA32_SPEC_CTRL (0x48) and IA32_PRED_CMD (0x49)
+IA32_SPEC_CTRL, bit0 – Indirect Branch Restricted Speculation (IBRS)
+IA32_PRED_CMD,  bit0 – Indirect Branch Prediction Barrier (IBPB)
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit f1f160a92b70c25d6e6e76788463bbec86a73313)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cpufeatures.h | 1 +
+ arch/x86/include/asm/msr-index.h   | 5 +++++
+ arch/x86/kernel/cpu/scattered.c    | 1 +
+ 3 files changed, 7 insertions(+)
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 3928050b51b0..44be8fd069bf 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -208,6 +208,7 @@
+ #define X86_FEATURE_AVX512_4FMAPS     ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ 
+ #define X86_FEATURE_MBA                       ( 7*32+18) /* Memory Bandwidth Allocation */
++#define X86_FEATURE_SPEC_CTRL         ( 7*32+19) /* Control Speculation Control */
+ 
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW                ( 8*32+ 0) /* Intel TPR Shadow */
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index db88b7f852b4..4e3438a00a50 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -41,6 +41,9 @@
+ #define MSR_PPIN_CTL                  0x0000004e
+ #define MSR_PPIN                      0x0000004f
+ 
++#define MSR_IA32_SPEC_CTRL            0x00000048
++#define MSR_IA32_PRED_CMD             0x00000049
++
+ #define MSR_IA32_PERFCTR0             0x000000c1
+ #define MSR_IA32_PERFCTR1             0x000000c2
+ #define MSR_FSB_FREQ                  0x000000cd
+@@ -437,6 +440,8 @@
+ #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX      (1<<1)
+ #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX     (1<<2)
+ #define FEATURE_CONTROL_LMCE                          (1<<20)
++#define FEATURE_ENABLE_IBRS                           (1<<0)
++#define FEATURE_SET_IBPB                              (1<<0)
+ 
+ #define MSR_IA32_APICBASE             0x0000001b
+ #define MSR_IA32_APICBASE_BSP         (1<<8)
+diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
+index 23c23508c012..9651ea395812 100644
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -24,6 +24,7 @@ static const struct cpuid_bit cpuid_bits[] = {
+       { X86_FEATURE_INTEL_PT,         CPUID_EBX, 25, 0x00000007, 0 },
+       { X86_FEATURE_AVX512_4VNNIW,    CPUID_EDX,  2, 0x00000007, 0 },
+       { X86_FEATURE_AVX512_4FMAPS,    CPUID_EDX,  3, 0x00000007, 0 },
++      { X86_FEATURE_SPEC_CTRL,        CPUID_EDX, 26, 0x00000007, 0 },
+       { X86_FEATURE_CAT_L3,           CPUID_EBX,  1, 0x00000010, 0 },
+       { X86_FEATURE_CAT_L2,           CPUID_EBX,  2, 0x00000010, 0 },
+       { X86_FEATURE_CDP_L3,           CPUID_ECX,  2, 0x00000010, 1 },
+-- 
+2.14.2
+
diff --git a/patches/kernel/0274-x86-feature-Report-presence-of-IBPB-and-IBRS-control.patch b/patches/kernel/0274-x86-feature-Report-presence-of-IBPB-and-IBRS-control.patch

deleted file mode 100644 (file)

index f4944f4..0000000
--- a/patches/kernel/0274-x86-feature-Report-presence-of-IBPB-and-IBRS-control.patch
+++ /dev/null
@@ -1,41 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Wed, 27 Sep 2017 12:09:14 -0700
-Subject: [PATCH] x86/feature: Report presence of IBPB and IBRS control
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Report presence of IBPB and IBRS.
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit c41156d893e7f48bebf8d71cfddd39d8fb2724f8)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/intel.c | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
-index dfa90a3a5145..f1d94c73625a 100644
---- a/arch/x86/kernel/cpu/intel.c
-+++ b/arch/x86/kernel/cpu/intel.c
-@@ -627,6 +627,11 @@ static void init_intel(struct cpuinfo_x86 *c)
-       init_intel_energy_perf(c);
- 
-       init_intel_misc_features(c);
-+
-+      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+                printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n");
-+        else
-+                printk_once(KERN_INFO "FEATURE SPEC_CTRL Not Present\n");
- }
- 
- #ifdef CONFIG_X86_32
--- 
-2.14.2
-
diff --git a/patches/kernel/0275-x86-enter-MACROS-to-set-clear-IBRS-and-set-IBPB.patch b/patches/kernel/0275-x86-enter-MACROS-to-set-clear-IBRS-and-set-IBPB.patch

deleted file mode 100644 (file)

index f150ffa..0000000
--- a/patches/kernel/0275-x86-enter-MACROS-to-set-clear-IBRS-and-set-IBPB.patch
+++ /dev/null
@@ -1,84 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Fri, 15 Sep 2017 18:04:53 -0700
-Subject: [PATCH] x86/enter: MACROS to set/clear IBRS and set IBPB
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Setup macros to control IBRS and IBPB
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 171d754fe3b783d361555cf2569e68a7b0e0d54a)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/spec_ctrl.h | 52 ++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 52 insertions(+)
- create mode 100644 arch/x86/include/asm/spec_ctrl.h
-
-diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
-new file mode 100644
-index 000000000000..7f8bb09b6acb
---- /dev/null
-+++ b/arch/x86/include/asm/spec_ctrl.h
-@@ -0,0 +1,52 @@
-+#ifndef _ASM_X86_SPEC_CTRL_H
-+#define _ASM_X86_SPEC_CTRL_H
-+
-+#include <linux/stringify.h>
-+#include <asm/msr-index.h>
-+#include <asm/cpufeatures.h>
-+#include <asm/alternative-asm.h>
-+
-+#ifdef __ASSEMBLY__
-+
-+#define __ASM_ENABLE_IBRS                     \
-+      pushq %rax;                             \
-+      pushq %rcx;                             \
-+      pushq %rdx;                             \
-+      movl $MSR_IA32_SPEC_CTRL, %ecx;         \
-+      movl $0, %edx;                          \
-+      movl $FEATURE_ENABLE_IBRS, %eax;        \
-+      wrmsr;                                  \
-+      popq %rdx;                              \
-+      popq %rcx;                              \
-+      popq %rax
-+#define __ASM_ENABLE_IBRS_CLOBBER             \
-+      movl $MSR_IA32_SPEC_CTRL, %ecx;         \
-+      movl $0, %edx;                          \
-+      movl $FEATURE_ENABLE_IBRS, %eax;        \
-+      wrmsr;
-+#define __ASM_DISABLE_IBRS                    \
-+      pushq %rax;                             \
-+      pushq %rcx;                             \
-+      pushq %rdx;                             \
-+      movl $MSR_IA32_SPEC_CTRL, %ecx;         \
-+      movl $0, %edx;                          \
-+      movl $0, %eax;                          \
-+      wrmsr;                                  \
-+      popq %rdx;                              \
-+      popq %rcx;                              \
-+      popq %rax
-+
-+.macro ENABLE_IBRS
-+ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS), X86_FEATURE_SPEC_CTRL
-+.endm
-+
-+.macro ENABLE_IBRS_CLOBBER
-+ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS_CLOBBER), X86_FEATURE_SPEC_CTRL
-+.endm
-+
-+.macro DISABLE_IBRS
-+ALTERNATIVE "", __stringify(__ASM_DISABLE_IBRS), X86_FEATURE_SPEC_CTRL
-+.endm
-+
-+#endif /* __ASSEMBLY__ */
-+#endif /* _ASM_X86_SPEC_CTRL_H */
--- 
-2.14.2
-
diff --git a/patches/kernel/0275-x86-feature-Report-presence-of-IBPB-and-IBRS-control.patch b/patches/kernel/0275-x86-feature-Report-presence-of-IBPB-and-IBRS-control.patch

new file mode 100644 (file)

index 0000000..f4944f4
--- /dev/null
+++ b/patches/kernel/0275-x86-feature-Report-presence-of-IBPB-and-IBRS-control.patch
@@ -0,0 +1,41 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Wed, 27 Sep 2017 12:09:14 -0700
+Subject: [PATCH] x86/feature: Report presence of IBPB and IBRS control
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Report presence of IBPB and IBRS.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit c41156d893e7f48bebf8d71cfddd39d8fb2724f8)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/intel.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
+index dfa90a3a5145..f1d94c73625a 100644
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -627,6 +627,11 @@ static void init_intel(struct cpuinfo_x86 *c)
+       init_intel_energy_perf(c);
+ 
+       init_intel_misc_features(c);
++
++      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++                printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n");
++        else
++                printk_once(KERN_INFO "FEATURE SPEC_CTRL Not Present\n");
+ }
+ 
+ #ifdef CONFIG_X86_32
+-- 
+2.14.2
+
diff --git a/patches/kernel/0276-x86-enter-MACROS-to-set-clear-IBRS-and-set-IBPB.patch b/patches/kernel/0276-x86-enter-MACROS-to-set-clear-IBRS-and-set-IBPB.patch

new file mode 100644 (file)

index 0000000..f150ffa
--- /dev/null
+++ b/patches/kernel/0276-x86-enter-MACROS-to-set-clear-IBRS-and-set-IBPB.patch
@@ -0,0 +1,84 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 15 Sep 2017 18:04:53 -0700
+Subject: [PATCH] x86/enter: MACROS to set/clear IBRS and set IBPB
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Setup macros to control IBRS and IBPB
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 171d754fe3b783d361555cf2569e68a7b0e0d54a)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/spec_ctrl.h | 52 ++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 52 insertions(+)
+ create mode 100644 arch/x86/include/asm/spec_ctrl.h
+
+diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
+new file mode 100644
+index 000000000000..7f8bb09b6acb
+--- /dev/null
++++ b/arch/x86/include/asm/spec_ctrl.h
+@@ -0,0 +1,52 @@
++#ifndef _ASM_X86_SPEC_CTRL_H
++#define _ASM_X86_SPEC_CTRL_H
++
++#include <linux/stringify.h>
++#include <asm/msr-index.h>
++#include <asm/cpufeatures.h>
++#include <asm/alternative-asm.h>
++
++#ifdef __ASSEMBLY__
++
++#define __ASM_ENABLE_IBRS                     \
++      pushq %rax;                             \
++      pushq %rcx;                             \
++      pushq %rdx;                             \
++      movl $MSR_IA32_SPEC_CTRL, %ecx;         \
++      movl $0, %edx;                          \
++      movl $FEATURE_ENABLE_IBRS, %eax;        \
++      wrmsr;                                  \
++      popq %rdx;                              \
++      popq %rcx;                              \
++      popq %rax
++#define __ASM_ENABLE_IBRS_CLOBBER             \
++      movl $MSR_IA32_SPEC_CTRL, %ecx;         \
++      movl $0, %edx;                          \
++      movl $FEATURE_ENABLE_IBRS, %eax;        \
++      wrmsr;
++#define __ASM_DISABLE_IBRS                    \
++      pushq %rax;                             \
++      pushq %rcx;                             \
++      pushq %rdx;                             \
++      movl $MSR_IA32_SPEC_CTRL, %ecx;         \
++      movl $0, %edx;                          \
++      movl $0, %eax;                          \
++      wrmsr;                                  \
++      popq %rdx;                              \
++      popq %rcx;                              \
++      popq %rax
++
++.macro ENABLE_IBRS
++ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS), X86_FEATURE_SPEC_CTRL
++.endm
++
++.macro ENABLE_IBRS_CLOBBER
++ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS_CLOBBER), X86_FEATURE_SPEC_CTRL
++.endm
++
++.macro DISABLE_IBRS
++ALTERNATIVE "", __stringify(__ASM_DISABLE_IBRS), X86_FEATURE_SPEC_CTRL
++.endm
++
++#endif /* __ASSEMBLY__ */
++#endif /* _ASM_X86_SPEC_CTRL_H */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0276-x86-enter-Use-IBRS-on-syscall-and-interrupts.patch b/patches/kernel/0276-x86-enter-Use-IBRS-on-syscall-and-interrupts.patch

deleted file mode 100644 (file)

index 68f96e7..0000000
--- a/patches/kernel/0276-x86-enter-Use-IBRS-on-syscall-and-interrupts.patch
+++ /dev/null
@@ -1,171 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Fri, 13 Oct 2017 14:25:00 -0700
-Subject: [PATCH] x86/enter: Use IBRS on syscall and interrupts
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Set IBRS upon kernel entrance via syscall and interrupts. Clear it upon exit.
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d7eb5f9ed26dbdc39df793491bdcc9f80d41325e)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S        | 18 +++++++++++++++++-
- arch/x86/entry/entry_64_compat.S |  7 +++++++
- 2 files changed, 24 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index b48f2c78a9bf..5f898c3c1dad 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -36,6 +36,7 @@
- #include <asm/pgtable_types.h>
- #include <asm/export.h>
- #include <asm/frame.h>
-+#include <asm/spec_ctrl.h>
- #include <linux/err.h>
- 
- #include "calling.h"
-@@ -235,6 +236,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
-       sub     $(6*8), %rsp                    /* pt_regs->bp, bx, r12-15 not saved */
-       UNWIND_HINT_REGS extra=0
- 
-+      ENABLE_IBRS
-+
-       /*
-        * If we need to do entry work or if we guess we'll need to do
-        * exit work, go straight to the slow path.
-@@ -286,6 +289,7 @@ entry_SYSCALL_64_fastpath:
-       TRACE_IRQS_ON           /* user mode is traced as IRQs on */
-       movq    RIP(%rsp), %rcx
-       movq    EFLAGS(%rsp), %r11
-+      DISABLE_IBRS
-       addq    $6*8, %rsp      /* skip extra regs -- they were preserved */
-       UNWIND_HINT_EMPTY
-       jmp     .Lpop_c_regs_except_rcx_r11_and_sysret
-@@ -379,6 +383,8 @@ return_from_SYSCALL_64:
-        * perf profiles. Nothing jumps here.
-        */
- syscall_return_via_sysret:
-+      DISABLE_IBRS
-+
-       /* rcx and r11 are already restored (see code above) */
-       UNWIND_HINT_EMPTY
-       POP_EXTRA_REGS
-@@ -660,6 +666,10 @@ END(irq_entries_start)
-       /*
-        * IRQ from user mode.
-        *
-+       */
-+      ENABLE_IBRS
-+
-+      /*
-        * We need to tell lockdep that IRQs are off.  We can't do this until
-        * we fix gsbase, and we should do it before enter_from_user_mode
-        * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
-@@ -743,7 +753,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
-        * We are on the trampoline stack.  All regs except RDI are live.
-        * We can do future final exit work right here.
-        */
--
-+      DISABLE_IBRS
-       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
- 
-       /* Restore RDI. */
-@@ -1277,6 +1287,7 @@ ENTRY(paranoid_entry)
- 
- 1:
-       SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
-+      ENABLE_IBRS_CLOBBER
- 
-       ret
- END(paranoid_entry)
-@@ -1331,6 +1342,8 @@ ENTRY(error_entry)
-       /* We have user CR3.  Change to kernel CR3. */
-       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
- 
-+      ENABLE_IBRS
-+
- .Lerror_entry_from_usermode_after_swapgs:
-       /* Put us onto the real thread stack. */
-       popq    %r12                            /* save return addr in %12 */
-@@ -1377,6 +1390,7 @@ ENTRY(error_entry)
-        */
-       SWAPGS
-       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
-+      ENABLE_IBRS_CLOBBER
-       jmp .Lerror_entry_done
- 
- .Lbstep_iret:
-@@ -1391,6 +1405,7 @@ ENTRY(error_entry)
-        */
-       SWAPGS
-       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
-+      ENABLE_IBRS_CLOBBER
- 
-       /*
-        * Pretend that the exception came from user mode: set up pt_regs
-@@ -1518,6 +1533,7 @@ ENTRY(nmi)
-       UNWIND_HINT_REGS
-       ENCODE_FRAME_POINTER
- 
-+      ENABLE_IBRS
-       /*
-        * At this point we no longer need to worry about stack damage
-        * due to nesting -- we're on the normal thread stack and we're
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index 2b5e7685823c..ee4f3edb3c50 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -13,6 +13,7 @@
- #include <asm/irqflags.h>
- #include <asm/asm.h>
- #include <asm/smap.h>
-+#include <asm/spec_ctrl.h>
- #include <linux/linkage.h>
- #include <linux/err.h>
- 
-@@ -95,6 +96,8 @@ ENTRY(entry_SYSENTER_compat)
-       pushq   $0                      /* pt_regs->r15 = 0 */
-       cld
- 
-+      ENABLE_IBRS
-+
-       /*
-        * SYSENTER doesn't filter flags, so we need to clear NT and AC
-        * ourselves.  To save a few cycles, we can check whether
-@@ -194,6 +197,7 @@ ENTRY(entry_SYSCALL_compat)
- 
-       /* Use %rsp as scratch reg. User ESP is stashed in r8 */
-       SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
-+      ENABLE_IBRS
- 
-       /* Switch to the kernel stack */
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-@@ -249,6 +253,7 @@ sysret32_from_system_call:
-       popq    %rsi                    /* pt_regs->si */
-       popq    %rdi                    /* pt_regs->di */
- 
-+      DISABLE_IBRS
-         /*
-          * USERGS_SYSRET32 does:
-          *  GSBASE = user's GS base
-@@ -348,6 +353,8 @@ ENTRY(entry_INT80_compat)
-       pushq   %r15                    /* pt_regs->r15 */
-       cld
- 
-+      ENABLE_IBRS
-+
-       /*
-        * User mode is traced as though IRQs are on, and the interrupt
-        * gate turned them off.
--- 
-2.14.2
-
diff --git a/patches/kernel/0277-x86-enter-Use-IBRS-on-syscall-and-interrupts.patch b/patches/kernel/0277-x86-enter-Use-IBRS-on-syscall-and-interrupts.patch

new file mode 100644 (file)

index 0000000..68f96e7
--- /dev/null
+++ b/patches/kernel/0277-x86-enter-Use-IBRS-on-syscall-and-interrupts.patch
@@ -0,0 +1,171 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 13 Oct 2017 14:25:00 -0700
+Subject: [PATCH] x86/enter: Use IBRS on syscall and interrupts
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Set IBRS upon kernel entrance via syscall and interrupts. Clear it upon exit.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d7eb5f9ed26dbdc39df793491bdcc9f80d41325e)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S        | 18 +++++++++++++++++-
+ arch/x86/entry/entry_64_compat.S |  7 +++++++
+ 2 files changed, 24 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index b48f2c78a9bf..5f898c3c1dad 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -36,6 +36,7 @@
+ #include <asm/pgtable_types.h>
+ #include <asm/export.h>
+ #include <asm/frame.h>
++#include <asm/spec_ctrl.h>
+ #include <linux/err.h>
+ 
+ #include "calling.h"
+@@ -235,6 +236,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
+       sub     $(6*8), %rsp                    /* pt_regs->bp, bx, r12-15 not saved */
+       UNWIND_HINT_REGS extra=0
+ 
++      ENABLE_IBRS
++
+       /*
+        * If we need to do entry work or if we guess we'll need to do
+        * exit work, go straight to the slow path.
+@@ -286,6 +289,7 @@ entry_SYSCALL_64_fastpath:
+       TRACE_IRQS_ON           /* user mode is traced as IRQs on */
+       movq    RIP(%rsp), %rcx
+       movq    EFLAGS(%rsp), %r11
++      DISABLE_IBRS
+       addq    $6*8, %rsp      /* skip extra regs -- they were preserved */
+       UNWIND_HINT_EMPTY
+       jmp     .Lpop_c_regs_except_rcx_r11_and_sysret
+@@ -379,6 +383,8 @@ return_from_SYSCALL_64:
+        * perf profiles. Nothing jumps here.
+        */
+ syscall_return_via_sysret:
++      DISABLE_IBRS
++
+       /* rcx and r11 are already restored (see code above) */
+       UNWIND_HINT_EMPTY
+       POP_EXTRA_REGS
+@@ -660,6 +666,10 @@ END(irq_entries_start)
+       /*
+        * IRQ from user mode.
+        *
++       */
++      ENABLE_IBRS
++
++      /*
+        * We need to tell lockdep that IRQs are off.  We can't do this until
+        * we fix gsbase, and we should do it before enter_from_user_mode
+        * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
+@@ -743,7 +753,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
+-
++      DISABLE_IBRS
+       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+ 
+       /* Restore RDI. */
+@@ -1277,6 +1287,7 @@ ENTRY(paranoid_entry)
+ 
+ 1:
+       SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
++      ENABLE_IBRS_CLOBBER
+ 
+       ret
+ END(paranoid_entry)
+@@ -1331,6 +1342,8 @@ ENTRY(error_entry)
+       /* We have user CR3.  Change to kernel CR3. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ 
++      ENABLE_IBRS
++
+ .Lerror_entry_from_usermode_after_swapgs:
+       /* Put us onto the real thread stack. */
+       popq    %r12                            /* save return addr in %12 */
+@@ -1377,6 +1390,7 @@ ENTRY(error_entry)
+        */
+       SWAPGS
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++      ENABLE_IBRS_CLOBBER
+       jmp .Lerror_entry_done
+ 
+ .Lbstep_iret:
+@@ -1391,6 +1405,7 @@ ENTRY(error_entry)
+        */
+       SWAPGS
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++      ENABLE_IBRS_CLOBBER
+ 
+       /*
+        * Pretend that the exception came from user mode: set up pt_regs
+@@ -1518,6 +1533,7 @@ ENTRY(nmi)
+       UNWIND_HINT_REGS
+       ENCODE_FRAME_POINTER
+ 
++      ENABLE_IBRS
+       /*
+        * At this point we no longer need to worry about stack damage
+        * due to nesting -- we're on the normal thread stack and we're
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index 2b5e7685823c..ee4f3edb3c50 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -13,6 +13,7 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/spec_ctrl.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+ 
+@@ -95,6 +96,8 @@ ENTRY(entry_SYSENTER_compat)
+       pushq   $0                      /* pt_regs->r15 = 0 */
+       cld
+ 
++      ENABLE_IBRS
++
+       /*
+        * SYSENTER doesn't filter flags, so we need to clear NT and AC
+        * ourselves.  To save a few cycles, we can check whether
+@@ -194,6 +197,7 @@ ENTRY(entry_SYSCALL_compat)
+ 
+       /* Use %rsp as scratch reg. User ESP is stashed in r8 */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
++      ENABLE_IBRS
+ 
+       /* Switch to the kernel stack */
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+@@ -249,6 +253,7 @@ sysret32_from_system_call:
+       popq    %rsi                    /* pt_regs->si */
+       popq    %rdi                    /* pt_regs->di */
+ 
++      DISABLE_IBRS
+         /*
+          * USERGS_SYSRET32 does:
+          *  GSBASE = user's GS base
+@@ -348,6 +353,8 @@ ENTRY(entry_INT80_compat)
+       pushq   %r15                    /* pt_regs->r15 */
+       cld
+ 
++      ENABLE_IBRS
++
+       /*
+        * User mode is traced as though IRQs are on, and the interrupt
+        * gate turned them off.
+-- 
+2.14.2
+
diff --git a/patches/kernel/0277-x86-idle-Disable-IBRS-entering-idle-and-enable-it-on.patch b/patches/kernel/0277-x86-idle-Disable-IBRS-entering-idle-and-enable-it-on.patch

deleted file mode 100644 (file)

index cc4b348..0000000
--- a/patches/kernel/0277-x86-idle-Disable-IBRS-entering-idle-and-enable-it-on.patch
+++ /dev/null
@@ -1,117 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Mon, 6 Nov 2017 18:19:14 -0800
-Subject: [PATCH] x86/idle: Disable IBRS entering idle and enable it on wakeup
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Clear IBRS on idle entry and set it on idle exit into kernel on mwait.
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 5521b04afda1d683c1ebad6c25c2529a88e6f061)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/mwait.h |  8 ++++++++
- arch/x86/kernel/process.c    | 12 ++++++++++--
- arch/x86/lib/delay.c         | 10 ++++++++++
- 3 files changed, 28 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
-index bda3c27f0da0..f15120ada161 100644
---- a/arch/x86/include/asm/mwait.h
-+++ b/arch/x86/include/asm/mwait.h
-@@ -5,6 +5,8 @@
- #include <linux/sched/idle.h>
- 
- #include <asm/cpufeature.h>
-+#include <asm/spec_ctrl.h>
-+#include <asm/microcode.h>
- 
- #define MWAIT_SUBSTATE_MASK           0xf
- #define MWAIT_CSTATE_MASK             0xf
-@@ -105,9 +107,15 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
-                       mb();
-               }
- 
-+              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+                      native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
-+
-               __monitor((void *)&current_thread_info()->flags, 0, 0);
-               if (!need_resched())
-                       __mwait(eax, ecx);
-+
-+              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+                      native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
-       }
-       current_clr_polling();
- }
-diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index 07e6218ad7d9..3adb3806a284 100644
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -447,11 +447,19 @@ static __cpuidle void mwait_idle(void)
-                       mb(); /* quirk */
-               }
- 
-+              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+                        native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
-+
-               __monitor((void *)&current_thread_info()->flags, 0, 0);
--              if (!need_resched())
-+              if (!need_resched()) {
-                       __sti_mwait(0, 0);
--              else
-+                      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+                              native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
-+              } else {
-+                      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+                              native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
-                       local_irq_enable();
-+              }
-               trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
-       } else {
-               local_irq_enable();
-diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
-index cf2ac227c2ac..b088463973e4 100644
---- a/arch/x86/lib/delay.c
-+++ b/arch/x86/lib/delay.c
-@@ -26,6 +26,8 @@
- # include <asm/smp.h>
- #endif
- 
-+#define IBRS_DISABLE_THRESHOLD        1000
-+
- /* simple loop based delay: */
- static void delay_loop(unsigned long loops)
- {
-@@ -105,6 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
-       for (;;) {
-               delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
- 
-+              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
-+                      (delay > IBRS_DISABLE_THRESHOLD))
-+                      native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
-+
-               /*
-                * Use cpu_tss_rw as a cacheline-aligned, seldomly
-                * accessed per-cpu variable as the monitor target.
-@@ -118,6 +124,10 @@ static void delay_mwaitx(unsigned long __loops)
-                */
-               __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
- 
-+              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
-+                      (delay > IBRS_DISABLE_THRESHOLD))
-+                      native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
-+
-               end = rdtsc_ordered();
- 
-               if (loops <= end - start)
--- 
-2.14.2
-
diff --git a/patches/kernel/0278-x86-idle-Disable-IBRS-entering-idle-and-enable-it-on.patch b/patches/kernel/0278-x86-idle-Disable-IBRS-entering-idle-and-enable-it-on.patch

new file mode 100644 (file)

index 0000000..cc4b348
--- /dev/null
+++ b/patches/kernel/0278-x86-idle-Disable-IBRS-entering-idle-and-enable-it-on.patch
@@ -0,0 +1,117 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Mon, 6 Nov 2017 18:19:14 -0800
+Subject: [PATCH] x86/idle: Disable IBRS entering idle and enable it on wakeup
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Clear IBRS on idle entry and set it on idle exit into kernel on mwait.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 5521b04afda1d683c1ebad6c25c2529a88e6f061)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/mwait.h |  8 ++++++++
+ arch/x86/kernel/process.c    | 12 ++++++++++--
+ arch/x86/lib/delay.c         | 10 ++++++++++
+ 3 files changed, 28 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
+index bda3c27f0da0..f15120ada161 100644
+--- a/arch/x86/include/asm/mwait.h
++++ b/arch/x86/include/asm/mwait.h
+@@ -5,6 +5,8 @@
+ #include <linux/sched/idle.h>
+ 
+ #include <asm/cpufeature.h>
++#include <asm/spec_ctrl.h>
++#include <asm/microcode.h>
+ 
+ #define MWAIT_SUBSTATE_MASK           0xf
+ #define MWAIT_CSTATE_MASK             0xf
+@@ -105,9 +107,15 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+                       mb();
+               }
+ 
++              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++                      native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
+               __monitor((void *)&current_thread_info()->flags, 0, 0);
+               if (!need_resched())
+                       __mwait(eax, ecx);
++
++              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++                      native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
+       }
+       current_clr_polling();
+ }
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index 07e6218ad7d9..3adb3806a284 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -447,11 +447,19 @@ static __cpuidle void mwait_idle(void)
+                       mb(); /* quirk */
+               }
+ 
++              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++                        native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
+               __monitor((void *)&current_thread_info()->flags, 0, 0);
+-              if (!need_resched())
++              if (!need_resched()) {
+                       __sti_mwait(0, 0);
+-              else
++                      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++                              native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
++              } else {
++                      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++                              native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
+                       local_irq_enable();
++              }
+               trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
+       } else {
+               local_irq_enable();
+diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
+index cf2ac227c2ac..b088463973e4 100644
+--- a/arch/x86/lib/delay.c
++++ b/arch/x86/lib/delay.c
+@@ -26,6 +26,8 @@
+ # include <asm/smp.h>
+ #endif
+ 
++#define IBRS_DISABLE_THRESHOLD        1000
++
+ /* simple loop based delay: */
+ static void delay_loop(unsigned long loops)
+ {
+@@ -105,6 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
+       for (;;) {
+               delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
+ 
++              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
++                      (delay > IBRS_DISABLE_THRESHOLD))
++                      native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
+               /*
+                * Use cpu_tss_rw as a cacheline-aligned, seldomly
+                * accessed per-cpu variable as the monitor target.
+@@ -118,6 +124,10 @@ static void delay_mwaitx(unsigned long __loops)
+                */
+               __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
+ 
++              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
++                      (delay > IBRS_DISABLE_THRESHOLD))
++                      native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
++
+               end = rdtsc_ordered();
+ 
+               if (loops <= end - start)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0278-x86-idle-Disable-IBRS-when-offlining-cpu-and-re-enab.patch b/patches/kernel/0278-x86-idle-Disable-IBRS-when-offlining-cpu-and-re-enab.patch

deleted file mode 100644 (file)

index 8424c28..0000000
--- a/patches/kernel/0278-x86-idle-Disable-IBRS-when-offlining-cpu-and-re-enab.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Wed, 15 Nov 2017 12:24:19 -0800
-Subject: [PATCH] x86/idle: Disable IBRS when offlining cpu and re-enable on
- wakeup
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Clear IBRS when cpu is offlined and set it when brining it back online.
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9bcf662c1690880b2464fe99d0f58dce53c0d89f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/smpboot.c | 7 +++++++
- 1 file changed, 7 insertions(+)
-
-diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
-index 398e8324fea4..a652bff7add4 100644
---- a/arch/x86/kernel/smpboot.c
-+++ b/arch/x86/kernel/smpboot.c
-@@ -77,6 +77,7 @@
- #include <asm/i8259.h>
- #include <asm/realmode.h>
- #include <asm/misc.h>
-+#include <asm/microcode.h>
- 
- /* Number of siblings per CPU package */
- int smp_num_siblings = 1;
-@@ -1692,9 +1693,15 @@ void native_play_dead(void)
-       play_dead_common();
-       tboot_shutdown(TB_SHUTDOWN_WFS);
- 
-+      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+              native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
-+
-       mwait_play_dead();      /* Only returns on failure */
-       if (cpuidle_play_dead())
-               hlt_play_dead();
-+
-+      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+              native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
- }
- 
- #else /* ... !CONFIG_HOTPLUG_CPU */
--- 
-2.14.2
-
diff --git a/patches/kernel/0279-x86-idle-Disable-IBRS-when-offlining-cpu-and-re-enab.patch b/patches/kernel/0279-x86-idle-Disable-IBRS-when-offlining-cpu-and-re-enab.patch

new file mode 100644 (file)

index 0000000..8424c28
--- /dev/null
+++ b/patches/kernel/0279-x86-idle-Disable-IBRS-when-offlining-cpu-and-re-enab.patch
@@ -0,0 +1,54 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Wed, 15 Nov 2017 12:24:19 -0800
+Subject: [PATCH] x86/idle: Disable IBRS when offlining cpu and re-enable on
+ wakeup
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Clear IBRS when cpu is offlined and set it when brining it back online.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9bcf662c1690880b2464fe99d0f58dce53c0d89f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/smpboot.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index 398e8324fea4..a652bff7add4 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -77,6 +77,7 @@
+ #include <asm/i8259.h>
+ #include <asm/realmode.h>
+ #include <asm/misc.h>
++#include <asm/microcode.h>
+ 
+ /* Number of siblings per CPU package */
+ int smp_num_siblings = 1;
+@@ -1692,9 +1693,15 @@ void native_play_dead(void)
+       play_dead_common();
+       tboot_shutdown(TB_SHUTDOWN_WFS);
+ 
++      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++              native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
+       mwait_play_dead();      /* Only returns on failure */
+       if (cpuidle_play_dead())
+               hlt_play_dead();
++
++      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++              native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
+ }
+ 
+ #else /* ... !CONFIG_HOTPLUG_CPU */
+-- 
+2.14.2
+
diff --git a/patches/kernel/0279-x86-mm-Set-IBPB-upon-context-switch.patch b/patches/kernel/0279-x86-mm-Set-IBPB-upon-context-switch.patch

deleted file mode 100644 (file)

index de5de85..0000000
--- a/patches/kernel/0279-x86-mm-Set-IBPB-upon-context-switch.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Fri, 20 Oct 2017 12:56:29 -0700
-Subject: [PATCH] x86/mm: Set IBPB upon context switch
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Set IBPB on context switch with changing of page table.
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit a3320203792b633fb96df5d0bbfb7036129b78e2)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/mm/tlb.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 06f3854d0a4f..bb3ded3a4e5f 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -12,6 +12,7 @@
- #include <asm/cache.h>
- #include <asm/apic.h>
- #include <asm/uv/uv.h>
-+#include <asm/microcode.h>
- #include <linux/debugfs.h>
- 
- /*
-@@ -218,6 +219,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-               u16 new_asid;
-               bool need_flush;
- 
-+              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+                      native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
-+
-               if (IS_ENABLED(CONFIG_VMAP_STACK)) {
-                       /*
-                        * If our current stack is in vmalloc space and isn't
--- 
-2.14.2
-
diff --git a/patches/kernel/0280-x86-mm-Only-set-IBPB-when-the-new-thread-cannot-ptra.patch b/patches/kernel/0280-x86-mm-Only-set-IBPB-when-the-new-thread-cannot-ptra.patch

deleted file mode 100644 (file)

index f85c03f..0000000
--- a/patches/kernel/0280-x86-mm-Only-set-IBPB-when-the-new-thread-cannot-ptra.patch
+++ /dev/null
@@ -1,127 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Tue, 7 Nov 2017 13:52:42 -0800
-Subject: [PATCH] x86/mm: Only set IBPB when the new thread cannot ptrace
- current thread
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-To reduce overhead of setting IBPB, we only do that when
-the new thread cannot ptrace the current one.  If the new
-thread has ptrace capability on current thread, it is safe.
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 65941af723059ffeeca269b99ab51b3c9e320751)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- include/linux/ptrace.h |  6 ++++++
- arch/x86/mm/tlb.c      |  5 ++++-
- kernel/ptrace.c        | 18 ++++++++++++++----
- 3 files changed, 24 insertions(+), 5 deletions(-)
-
-diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
-index 0e5fcc11b1b8..d6afefd5465b 100644
---- a/include/linux/ptrace.h
-+++ b/include/linux/ptrace.h
-@@ -63,12 +63,15 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
- #define PTRACE_MODE_NOAUDIT   0x04
- #define PTRACE_MODE_FSCREDS 0x08
- #define PTRACE_MODE_REALCREDS 0x10
-+#define PTRACE_MODE_NOACCESS_CHK 0x20
- 
- /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
- #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
- #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS)
- #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS)
- #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS)
-+#define PTRACE_MODE_IBPB (PTRACE_MODE_ATTACH | PTRACE_MODE_NOAUDIT \
-+                      | PTRACE_MODE_NOACCESS_CHK | PTRACE_MODE_REALCREDS)
- 
- /**
-  * ptrace_may_access - check whether the caller is permitted to access
-@@ -86,6 +89,9 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
-  */
- extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);
- 
-+extern int ___ptrace_may_access(struct task_struct *cur, struct task_struct *task,
-+      unsigned int mode);
-+
- static inline int ptrace_reparented(struct task_struct *child)
- {
-       return !same_thread_group(child->real_parent, child->parent);
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index bb3ded3a4e5f..301e6efbc514 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -6,6 +6,7 @@
- #include <linux/interrupt.h>
- #include <linux/export.h>
- #include <linux/cpu.h>
-+#include <linux/ptrace.h>
- 
- #include <asm/tlbflush.h>
- #include <asm/mmu_context.h>
-@@ -219,7 +220,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-               u16 new_asid;
-               bool need_flush;
- 
--              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+              /* Null tsk means switching to kernel, so that's safe */
-+              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) && tsk &&
-+                      ___ptrace_may_access(tsk, current, PTRACE_MODE_IBPB))
-                       native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
- 
-               if (IS_ENABLED(CONFIG_VMAP_STACK)) {
-diff --git a/kernel/ptrace.c b/kernel/ptrace.c
-index 60f356d91060..f2f0f1aeabaf 100644
---- a/kernel/ptrace.c
-+++ b/kernel/ptrace.c
-@@ -268,9 +268,10 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
- }
- 
- /* Returns 0 on success, -errno on denial. */
--static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
-+int ___ptrace_may_access(struct task_struct *cur, struct task_struct *task,
-+              unsigned int mode)
- {
--      const struct cred *cred = current_cred(), *tcred;
-+      const struct cred *cred = __task_cred(cur), *tcred;
-       struct mm_struct *mm;
-       kuid_t caller_uid;
-       kgid_t caller_gid;
-@@ -290,7 +291,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
-        */
- 
-       /* Don't let security modules deny introspection */
--      if (same_thread_group(task, current))
-+      if (same_thread_group(task, cur))
-               return 0;
-       rcu_read_lock();
-       if (mode & PTRACE_MODE_FSCREDS) {
-@@ -328,7 +329,16 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
-            !ptrace_has_cap(mm->user_ns, mode)))
-           return -EPERM;
- 
--      return security_ptrace_access_check(task, mode);
-+      if (!(mode & PTRACE_MODE_NOACCESS_CHK))
-+              return security_ptrace_access_check(task, mode);
-+
-+      return 0;
-+}
-+EXPORT_SYMBOL_GPL(___ptrace_may_access);
-+
-+static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
-+{
-+      return ___ptrace_may_access(current, task, mode);
- }
- 
- bool ptrace_may_access(struct task_struct *task, unsigned int mode)
--- 
-2.14.2
-
diff --git a/patches/kernel/0280-x86-mm-Set-IBPB-upon-context-switch.patch b/patches/kernel/0280-x86-mm-Set-IBPB-upon-context-switch.patch

new file mode 100644 (file)

index 0000000..de5de85
--- /dev/null
+++ b/patches/kernel/0280-x86-mm-Set-IBPB-upon-context-switch.patch
@@ -0,0 +1,47 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 20 Oct 2017 12:56:29 -0700
+Subject: [PATCH] x86/mm: Set IBPB upon context switch
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Set IBPB on context switch with changing of page table.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit a3320203792b633fb96df5d0bbfb7036129b78e2)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/mm/tlb.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 06f3854d0a4f..bb3ded3a4e5f 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -12,6 +12,7 @@
+ #include <asm/cache.h>
+ #include <asm/apic.h>
+ #include <asm/uv/uv.h>
++#include <asm/microcode.h>
+ #include <linux/debugfs.h>
+ 
+ /*
+@@ -218,6 +219,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+               u16 new_asid;
+               bool need_flush;
+ 
++              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++                      native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
++
+               if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+                       /*
+                        * If our current stack is in vmalloc space and isn't
+-- 
+2.14.2
+
diff --git a/patches/kernel/0281-x86-entry-Stuff-RSB-for-entry-to-kernel-for-non-SMEP.patch b/patches/kernel/0281-x86-entry-Stuff-RSB-for-entry-to-kernel-for-non-SMEP.patch

deleted file mode 100644 (file)

index 0eebfdf..0000000
--- a/patches/kernel/0281-x86-entry-Stuff-RSB-for-entry-to-kernel-for-non-SMEP.patch
+++ /dev/null
@@ -1,202 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Tue, 14 Nov 2017 17:16:30 -0800
-Subject: [PATCH] x86/entry: Stuff RSB for entry to kernel for non-SMEP
- platform
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Stuff RSB to prevent RSB underflow on non-SMEP platform.
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit b82785ac1d33ce219c77d72b7bd80a21e1441ac8)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/spec_ctrl.h | 71 ++++++++++++++++++++++++++++++++++++++++
- arch/x86/entry/entry_64.S        | 18 ++++++++--
- arch/x86/entry/entry_64_compat.S |  4 +++
- 3 files changed, 91 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
-index 7f8bb09b6acb..55ee1f36bda2 100644
---- a/arch/x86/include/asm/spec_ctrl.h
-+++ b/arch/x86/include/asm/spec_ctrl.h
-@@ -35,6 +35,73 @@
-       popq %rdx;                              \
-       popq %rcx;                              \
-       popq %rax
-+#define __ASM_STUFF_RSB                               \
-+      call    1f;                             \
-+      pause;                                  \
-+1:    call    2f;                             \
-+      pause;                                  \
-+2:    call    3f;                             \
-+      pause;                                  \
-+3:    call    4f;                             \
-+      pause;                                  \
-+4:    call    5f;                             \
-+      pause;                                  \
-+5:    call    6f;                             \
-+      pause;                                  \
-+6:    call    7f;                             \
-+      pause;                                  \
-+7:    call    8f;                             \
-+      pause;                                  \
-+8:    call    9f;                             \
-+      pause;                                  \
-+9:    call    10f;                            \
-+      pause;                                  \
-+10:   call    11f;                            \
-+      pause;                                  \
-+11:   call    12f;                            \
-+      pause;                                  \
-+12:   call    13f;                            \
-+      pause;                                  \
-+13:   call    14f;                            \
-+      pause;                                  \
-+14:   call    15f;                            \
-+      pause;                                  \
-+15:   call    16f;                            \
-+      pause;                                  \
-+16:   call    17f;                            \
-+      pause;                                  \
-+17:   call    18f;                            \
-+      pause;                                  \
-+18:   call    19f;                            \
-+      pause;                                  \
-+19:   call    20f;                            \
-+      pause;                                  \
-+20:   call    21f;                            \
-+      pause;                                  \
-+21:   call    22f;                            \
-+      pause;                                  \
-+22:   call    23f;                            \
-+      pause;                                  \
-+23:   call    24f;                            \
-+      pause;                                  \
-+24:   call    25f;                            \
-+      pause;                                  \
-+25:   call    26f;                            \
-+      pause;                                  \
-+26:   call    27f;                            \
-+      pause;                                  \
-+27:   call    28f;                            \
-+      pause;                                  \
-+28:   call    29f;                            \
-+      pause;                                  \
-+29:   call    30f;                            \
-+      pause;                                  \
-+30:   call    31f;                            \
-+      pause;                                  \
-+31:   call    32f;                            \
-+      pause;                                  \
-+32:                                           \
-+      add $(32*8), %rsp;
- 
- .macro ENABLE_IBRS
- ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS), X86_FEATURE_SPEC_CTRL
-@@ -48,5 +115,9 @@ ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS_CLOBBER), X86_FEATURE_SPEC_CTRL
- ALTERNATIVE "", __stringify(__ASM_DISABLE_IBRS), X86_FEATURE_SPEC_CTRL
- .endm
- 
-+.macro STUFF_RSB
-+ALTERNATIVE __stringify(__ASM_STUFF_RSB), "", X86_FEATURE_SMEP
-+.endm
-+
- #endif /* __ASSEMBLY__ */
- #endif /* _ASM_X86_SPEC_CTRL_H */
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 5f898c3c1dad..f6ec4ad5b114 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -214,8 +214,6 @@ ENTRY(entry_SYSCALL_64)
-       movq    %rsp, PER_CPU_VAR(rsp_scratch)
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- 
--      TRACE_IRQS_OFF
--
-       /* Construct struct pt_regs on stack */
-       pushq   $__USER_DS                      /* pt_regs->ss */
-       pushq   PER_CPU_VAR(rsp_scratch)        /* pt_regs->sp */
-@@ -238,6 +236,10 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
- 
-       ENABLE_IBRS
- 
-+      STUFF_RSB
-+
-+      TRACE_IRQS_OFF
-+
-       /*
-        * If we need to do entry work or if we guess we'll need to do
-        * exit work, go straight to the slow path.
-@@ -658,6 +660,13 @@ END(irq_entries_start)
-       ALLOC_PT_GPREGS_ON_STACK
-       SAVE_C_REGS
-       SAVE_EXTRA_REGS
-+
-+      /*
-+       * Have to do stuffing before encoding frame pointer.
-+       * Could add some unnecessary RSB clearing if coming
-+       * from kernel for non-SMEP platform.
-+       */
-+      STUFF_RSB
-       ENCODE_FRAME_POINTER
- 
-       testb   $3, CS(%rsp)
-@@ -1276,6 +1285,10 @@ ENTRY(paranoid_entry)
-       cld
-       SAVE_C_REGS 8
-       SAVE_EXTRA_REGS 8
-+      /*
-+       * Do the stuffing unconditionally from user/kernel to be safe
-+       */
-+      STUFF_RSB
-       ENCODE_FRAME_POINTER 8
-       movl    $1, %ebx
-       movl    $MSR_GS_BASE, %ecx
-@@ -1329,6 +1342,7 @@ ENTRY(error_entry)
-       cld
-       SAVE_C_REGS 8
-       SAVE_EXTRA_REGS 8
-+      STUFF_RSB
-       ENCODE_FRAME_POINTER 8
-       xorl    %ebx, %ebx
-       testb   $3, CS+8(%rsp)
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index ee4f3edb3c50..1480222bae02 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -97,6 +97,7 @@ ENTRY(entry_SYSENTER_compat)
-       cld
- 
-       ENABLE_IBRS
-+      STUFF_RSB
- 
-       /*
-        * SYSENTER doesn't filter flags, so we need to clear NT and AC
-@@ -227,6 +228,8 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
-       pushq   $0                      /* pt_regs->r14 = 0 */
-       pushq   $0                      /* pt_regs->r15 = 0 */
- 
-+      STUFF_RSB
-+
-       /*
-        * User mode is traced as though IRQs are on, and SYSENTER
-        * turned them off.
-@@ -354,6 +357,7 @@ ENTRY(entry_INT80_compat)
-       cld
- 
-       ENABLE_IBRS
-+      STUFF_RSB
- 
-       /*
-        * User mode is traced as though IRQs are on, and the interrupt
--- 
-2.14.2
-
diff --git a/patches/kernel/0281-x86-mm-Only-set-IBPB-when-the-new-thread-cannot-ptra.patch b/patches/kernel/0281-x86-mm-Only-set-IBPB-when-the-new-thread-cannot-ptra.patch

new file mode 100644 (file)

index 0000000..f85c03f
--- /dev/null
+++ b/patches/kernel/0281-x86-mm-Only-set-IBPB-when-the-new-thread-cannot-ptra.patch
@@ -0,0 +1,127 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Tue, 7 Nov 2017 13:52:42 -0800
+Subject: [PATCH] x86/mm: Only set IBPB when the new thread cannot ptrace
+ current thread
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+To reduce overhead of setting IBPB, we only do that when
+the new thread cannot ptrace the current one.  If the new
+thread has ptrace capability on current thread, it is safe.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 65941af723059ffeeca269b99ab51b3c9e320751)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ include/linux/ptrace.h |  6 ++++++
+ arch/x86/mm/tlb.c      |  5 ++++-
+ kernel/ptrace.c        | 18 ++++++++++++++----
+ 3 files changed, 24 insertions(+), 5 deletions(-)
+
+diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
+index 0e5fcc11b1b8..d6afefd5465b 100644
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -63,12 +63,15 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
+ #define PTRACE_MODE_NOAUDIT   0x04
+ #define PTRACE_MODE_FSCREDS 0x08
+ #define PTRACE_MODE_REALCREDS 0x10
++#define PTRACE_MODE_NOACCESS_CHK 0x20
+ 
+ /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
+ #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
+ #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS)
+ #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS)
+ #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS)
++#define PTRACE_MODE_IBPB (PTRACE_MODE_ATTACH | PTRACE_MODE_NOAUDIT \
++                      | PTRACE_MODE_NOACCESS_CHK | PTRACE_MODE_REALCREDS)
+ 
+ /**
+  * ptrace_may_access - check whether the caller is permitted to access
+@@ -86,6 +89,9 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
+  */
+ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);
+ 
++extern int ___ptrace_may_access(struct task_struct *cur, struct task_struct *task,
++      unsigned int mode);
++
+ static inline int ptrace_reparented(struct task_struct *child)
+ {
+       return !same_thread_group(child->real_parent, child->parent);
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index bb3ded3a4e5f..301e6efbc514 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -6,6 +6,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/export.h>
+ #include <linux/cpu.h>
++#include <linux/ptrace.h>
+ 
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
+@@ -219,7 +220,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+               u16 new_asid;
+               bool need_flush;
+ 
+-              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++              /* Null tsk means switching to kernel, so that's safe */
++              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) && tsk &&
++                      ___ptrace_may_access(tsk, current, PTRACE_MODE_IBPB))
+                       native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+ 
+               if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+diff --git a/kernel/ptrace.c b/kernel/ptrace.c
+index 60f356d91060..f2f0f1aeabaf 100644
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -268,9 +268,10 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
+ }
+ 
+ /* Returns 0 on success, -errno on denial. */
+-static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
++int ___ptrace_may_access(struct task_struct *cur, struct task_struct *task,
++              unsigned int mode)
+ {
+-      const struct cred *cred = current_cred(), *tcred;
++      const struct cred *cred = __task_cred(cur), *tcred;
+       struct mm_struct *mm;
+       kuid_t caller_uid;
+       kgid_t caller_gid;
+@@ -290,7 +291,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
+        */
+ 
+       /* Don't let security modules deny introspection */
+-      if (same_thread_group(task, current))
++      if (same_thread_group(task, cur))
+               return 0;
+       rcu_read_lock();
+       if (mode & PTRACE_MODE_FSCREDS) {
+@@ -328,7 +329,16 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
+            !ptrace_has_cap(mm->user_ns, mode)))
+           return -EPERM;
+ 
+-      return security_ptrace_access_check(task, mode);
++      if (!(mode & PTRACE_MODE_NOACCESS_CHK))
++              return security_ptrace_access_check(task, mode);
++
++      return 0;
++}
++EXPORT_SYMBOL_GPL(___ptrace_may_access);
++
++static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
++{
++      return ___ptrace_may_access(current, task, mode);
+ }
+ 
+ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0282-x86-entry-Stuff-RSB-for-entry-to-kernel-for-non-SMEP.patch b/patches/kernel/0282-x86-entry-Stuff-RSB-for-entry-to-kernel-for-non-SMEP.patch

new file mode 100644 (file)

index 0000000..0eebfdf
--- /dev/null
+++ b/patches/kernel/0282-x86-entry-Stuff-RSB-for-entry-to-kernel-for-non-SMEP.patch
@@ -0,0 +1,202 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Tue, 14 Nov 2017 17:16:30 -0800
+Subject: [PATCH] x86/entry: Stuff RSB for entry to kernel for non-SMEP
+ platform
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Stuff RSB to prevent RSB underflow on non-SMEP platform.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit b82785ac1d33ce219c77d72b7bd80a21e1441ac8)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/spec_ctrl.h | 71 ++++++++++++++++++++++++++++++++++++++++
+ arch/x86/entry/entry_64.S        | 18 ++++++++--
+ arch/x86/entry/entry_64_compat.S |  4 +++
+ 3 files changed, 91 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
+index 7f8bb09b6acb..55ee1f36bda2 100644
+--- a/arch/x86/include/asm/spec_ctrl.h
++++ b/arch/x86/include/asm/spec_ctrl.h
+@@ -35,6 +35,73 @@
+       popq %rdx;                              \
+       popq %rcx;                              \
+       popq %rax
++#define __ASM_STUFF_RSB                               \
++      call    1f;                             \
++      pause;                                  \
++1:    call    2f;                             \
++      pause;                                  \
++2:    call    3f;                             \
++      pause;                                  \
++3:    call    4f;                             \
++      pause;                                  \
++4:    call    5f;                             \
++      pause;                                  \
++5:    call    6f;                             \
++      pause;                                  \
++6:    call    7f;                             \
++      pause;                                  \
++7:    call    8f;                             \
++      pause;                                  \
++8:    call    9f;                             \
++      pause;                                  \
++9:    call    10f;                            \
++      pause;                                  \
++10:   call    11f;                            \
++      pause;                                  \
++11:   call    12f;                            \
++      pause;                                  \
++12:   call    13f;                            \
++      pause;                                  \
++13:   call    14f;                            \
++      pause;                                  \
++14:   call    15f;                            \
++      pause;                                  \
++15:   call    16f;                            \
++      pause;                                  \
++16:   call    17f;                            \
++      pause;                                  \
++17:   call    18f;                            \
++      pause;                                  \
++18:   call    19f;                            \
++      pause;                                  \
++19:   call    20f;                            \
++      pause;                                  \
++20:   call    21f;                            \
++      pause;                                  \
++21:   call    22f;                            \
++      pause;                                  \
++22:   call    23f;                            \
++      pause;                                  \
++23:   call    24f;                            \
++      pause;                                  \
++24:   call    25f;                            \
++      pause;                                  \
++25:   call    26f;                            \
++      pause;                                  \
++26:   call    27f;                            \
++      pause;                                  \
++27:   call    28f;                            \
++      pause;                                  \
++28:   call    29f;                            \
++      pause;                                  \
++29:   call    30f;                            \
++      pause;                                  \
++30:   call    31f;                            \
++      pause;                                  \
++31:   call    32f;                            \
++      pause;                                  \
++32:                                           \
++      add $(32*8), %rsp;
+ 
+ .macro ENABLE_IBRS
+ ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS), X86_FEATURE_SPEC_CTRL
+@@ -48,5 +115,9 @@ ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS_CLOBBER), X86_FEATURE_SPEC_CTRL
+ ALTERNATIVE "", __stringify(__ASM_DISABLE_IBRS), X86_FEATURE_SPEC_CTRL
+ .endm
+ 
++.macro STUFF_RSB
++ALTERNATIVE __stringify(__ASM_STUFF_RSB), "", X86_FEATURE_SMEP
++.endm
++
+ #endif /* __ASSEMBLY__ */
+ #endif /* _ASM_X86_SPEC_CTRL_H */
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 5f898c3c1dad..f6ec4ad5b114 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -214,8 +214,6 @@ ENTRY(entry_SYSCALL_64)
+       movq    %rsp, PER_CPU_VAR(rsp_scratch)
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ 
+-      TRACE_IRQS_OFF
+-
+       /* Construct struct pt_regs on stack */
+       pushq   $__USER_DS                      /* pt_regs->ss */
+       pushq   PER_CPU_VAR(rsp_scratch)        /* pt_regs->sp */
+@@ -238,6 +236,10 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
+ 
+       ENABLE_IBRS
+ 
++      STUFF_RSB
++
++      TRACE_IRQS_OFF
++
+       /*
+        * If we need to do entry work or if we guess we'll need to do
+        * exit work, go straight to the slow path.
+@@ -658,6 +660,13 @@ END(irq_entries_start)
+       ALLOC_PT_GPREGS_ON_STACK
+       SAVE_C_REGS
+       SAVE_EXTRA_REGS
++
++      /*
++       * Have to do stuffing before encoding frame pointer.
++       * Could add some unnecessary RSB clearing if coming
++       * from kernel for non-SMEP platform.
++       */
++      STUFF_RSB
+       ENCODE_FRAME_POINTER
+ 
+       testb   $3, CS(%rsp)
+@@ -1276,6 +1285,10 @@ ENTRY(paranoid_entry)
+       cld
+       SAVE_C_REGS 8
+       SAVE_EXTRA_REGS 8
++      /*
++       * Do the stuffing unconditionally from user/kernel to be safe
++       */
++      STUFF_RSB
+       ENCODE_FRAME_POINTER 8
+       movl    $1, %ebx
+       movl    $MSR_GS_BASE, %ecx
+@@ -1329,6 +1342,7 @@ ENTRY(error_entry)
+       cld
+       SAVE_C_REGS 8
+       SAVE_EXTRA_REGS 8
++      STUFF_RSB
+       ENCODE_FRAME_POINTER 8
+       xorl    %ebx, %ebx
+       testb   $3, CS+8(%rsp)
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index ee4f3edb3c50..1480222bae02 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -97,6 +97,7 @@ ENTRY(entry_SYSENTER_compat)
+       cld
+ 
+       ENABLE_IBRS
++      STUFF_RSB
+ 
+       /*
+        * SYSENTER doesn't filter flags, so we need to clear NT and AC
+@@ -227,6 +228,8 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
+       pushq   $0                      /* pt_regs->r14 = 0 */
+       pushq   $0                      /* pt_regs->r15 = 0 */
+ 
++      STUFF_RSB
++
+       /*
+        * User mode is traced as though IRQs are on, and SYSENTER
+        * turned them off.
+@@ -354,6 +357,7 @@ ENTRY(entry_INT80_compat)
+       cld
+ 
+       ENABLE_IBRS
++      STUFF_RSB
+ 
+       /*
+        * User mode is traced as though IRQs are on, and the interrupt
+-- 
+2.14.2
+
diff --git a/patches/kernel/0282-x86-kvm-add-MSR_IA32_SPEC_CTRL-and-MSR_IA32_PRED_CMD.patch b/patches/kernel/0282-x86-kvm-add-MSR_IA32_SPEC_CTRL-and-MSR_IA32_PRED_CMD.patch

deleted file mode 100644 (file)

index e285492..0000000
--- a/patches/kernel/0282-x86-kvm-add-MSR_IA32_SPEC_CTRL-and-MSR_IA32_PRED_CMD.patch
+++ /dev/null
@@ -1,103 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Wei Wang <wei.w.wang@intel.com>
-Date: Tue, 7 Nov 2017 16:47:53 +0800
-Subject: [PATCH] x86/kvm: add MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD to kvm
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Add field to access guest MSR_IA332_SPEC_CTRL and MSR_IA32_PRED_CMD state.
-
-Signed-off-by: Wei Wang <wei.w.wang@intel.com>
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 15eb187f47ee2be44d34313bc89cfb719d82cb21)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/kvm_host.h |  2 ++
- arch/x86/kvm/vmx.c              | 10 ++++++++++
- arch/x86/kvm/x86.c              |  2 +-
- 3 files changed, 13 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
-index b69af3df978a..1953c0a5b972 100644
---- a/arch/x86/include/asm/kvm_host.h
-+++ b/arch/x86/include/asm/kvm_host.h
-@@ -628,6 +628,8 @@ struct kvm_vcpu_arch {
-       u64 mcg_ext_ctl;
-       u64 *mce_banks;
- 
-+      u64 spec_ctrl;
-+
-       /* Cache MMIO info */
-       u64 mmio_gva;
-       unsigned access;
-diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
-index 9b4256fd589a..daff9962c90a 100644
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -50,6 +50,7 @@
- #include <asm/apic.h>
- #include <asm/irq_remapping.h>
- #include <asm/mmu_context.h>
-+#include <asm/microcode.h>
- 
- #include "trace.h"
- #include "pmu.h"
-@@ -3247,6 +3248,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
-       case MSR_IA32_TSC:
-               msr_info->data = guest_read_tsc(vcpu);
-               break;
-+      case MSR_IA32_SPEC_CTRL:
-+              msr_info->data = vcpu->arch.spec_ctrl;
-+              break;
-       case MSR_IA32_SYSENTER_CS:
-               msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
-               break;
-@@ -3351,6 +3355,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
-       case MSR_IA32_TSC:
-               kvm_write_tsc(vcpu, msr_info);
-               break;
-+      case MSR_IA32_SPEC_CTRL:
-+              vcpu->arch.spec_ctrl = msr_info->data;
-+              break;
-       case MSR_IA32_CR_PAT:
-               if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
-                       if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
-@@ -6146,6 +6153,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
- 
-       msr_info.index = ecx;
-       msr_info.host_initiated = false;
-+
-       if (vmx_get_msr(vcpu, &msr_info)) {
-               trace_kvm_msr_read_ex(ecx);
-               kvm_inject_gp(vcpu, 0);
-@@ -6699,6 +6707,8 @@ static __init int hardware_setup(void)
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-+      vmx_disable_intercept_for_msr(MSR_IA32_SPEC_CTRL, false);
-+      vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
- 
-       memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
-                       vmx_msr_bitmap_legacy, PAGE_SIZE);
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index 703cd4171921..eae4aecf3cfe 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -983,7 +983,7 @@ static u32 msrs_to_save[] = {
-       MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
- #endif
-       MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
--      MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
-+      MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, MSR_IA32_SPEC_CTRL,
- };
- 
- static unsigned num_msrs_to_save;
--- 
-2.14.2
-
diff --git a/patches/kernel/0283-x86-kvm-Set-IBPB-when-switching-VM.patch b/patches/kernel/0283-x86-kvm-Set-IBPB-when-switching-VM.patch

deleted file mode 100644 (file)

index 171ed40..0000000
--- a/patches/kernel/0283-x86-kvm-Set-IBPB-when-switching-VM.patch
+++ /dev/null
@@ -1,46 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Fri, 13 Oct 2017 14:31:46 -0700
-Subject: [PATCH] x86/kvm: Set IBPB when switching VM
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Set IBPB (Indirect branch prediction barrier) when switching VM.
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 665076ad780e8620505c742cfcb4b0f3fb99324a)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kvm/vmx.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
-index daff9962c90a..8df195bbb41d 100644
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -1488,6 +1488,7 @@ static void vmcs_load(struct vmcs *vmcs)
-       if (error)
-               printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
-                      vmcs, phys_addr);
-+
- }
- 
- #ifdef CONFIG_KEXEC_CORE
-@@ -2268,6 +2269,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-       if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
-               per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
-               vmcs_load(vmx->loaded_vmcs->vmcs);
-+              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+                      native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
-       }
- 
-       if (!already_loaded) {
--- 
-2.14.2
-
diff --git a/patches/kernel/0283-x86-kvm-add-MSR_IA32_SPEC_CTRL-and-MSR_IA32_PRED_CMD.patch b/patches/kernel/0283-x86-kvm-add-MSR_IA32_SPEC_CTRL-and-MSR_IA32_PRED_CMD.patch

new file mode 100644 (file)

index 0000000..e285492
--- /dev/null
+++ b/patches/kernel/0283-x86-kvm-add-MSR_IA32_SPEC_CTRL-and-MSR_IA32_PRED_CMD.patch
@@ -0,0 +1,103 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Wei Wang <wei.w.wang@intel.com>
+Date: Tue, 7 Nov 2017 16:47:53 +0800
+Subject: [PATCH] x86/kvm: add MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD to kvm
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Add field to access guest MSR_IA332_SPEC_CTRL and MSR_IA32_PRED_CMD state.
+
+Signed-off-by: Wei Wang <wei.w.wang@intel.com>
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 15eb187f47ee2be44d34313bc89cfb719d82cb21)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/kvm_host.h |  2 ++
+ arch/x86/kvm/vmx.c              | 10 ++++++++++
+ arch/x86/kvm/x86.c              |  2 +-
+ 3 files changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index b69af3df978a..1953c0a5b972 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -628,6 +628,8 @@ struct kvm_vcpu_arch {
+       u64 mcg_ext_ctl;
+       u64 *mce_banks;
+ 
++      u64 spec_ctrl;
++
+       /* Cache MMIO info */
+       u64 mmio_gva;
+       unsigned access;
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 9b4256fd589a..daff9962c90a 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -50,6 +50,7 @@
+ #include <asm/apic.h>
+ #include <asm/irq_remapping.h>
+ #include <asm/mmu_context.h>
++#include <asm/microcode.h>
+ 
+ #include "trace.h"
+ #include "pmu.h"
+@@ -3247,6 +3248,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+       case MSR_IA32_TSC:
+               msr_info->data = guest_read_tsc(vcpu);
+               break;
++      case MSR_IA32_SPEC_CTRL:
++              msr_info->data = vcpu->arch.spec_ctrl;
++              break;
+       case MSR_IA32_SYSENTER_CS:
+               msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
+               break;
+@@ -3351,6 +3355,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+       case MSR_IA32_TSC:
+               kvm_write_tsc(vcpu, msr_info);
+               break;
++      case MSR_IA32_SPEC_CTRL:
++              vcpu->arch.spec_ctrl = msr_info->data;
++              break;
+       case MSR_IA32_CR_PAT:
+               if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+                       if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+@@ -6146,6 +6153,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
+ 
+       msr_info.index = ecx;
+       msr_info.host_initiated = false;
++
+       if (vmx_get_msr(vcpu, &msr_info)) {
+               trace_kvm_msr_read_ex(ecx);
+               kvm_inject_gp(vcpu, 0);
+@@ -6699,6 +6707,8 @@ static __init int hardware_setup(void)
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
++      vmx_disable_intercept_for_msr(MSR_IA32_SPEC_CTRL, false);
++      vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
+ 
+       memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
+                       vmx_msr_bitmap_legacy, PAGE_SIZE);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 703cd4171921..eae4aecf3cfe 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -983,7 +983,7 @@ static u32 msrs_to_save[] = {
+       MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
+ #endif
+       MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+-      MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
++      MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, MSR_IA32_SPEC_CTRL,
+ };
+ 
+ static unsigned num_msrs_to_save;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0284-x86-kvm-Set-IBPB-when-switching-VM.patch b/patches/kernel/0284-x86-kvm-Set-IBPB-when-switching-VM.patch

new file mode 100644 (file)

index 0000000..171ed40
--- /dev/null
+++ b/patches/kernel/0284-x86-kvm-Set-IBPB-when-switching-VM.patch
@@ -0,0 +1,46 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 13 Oct 2017 14:31:46 -0700
+Subject: [PATCH] x86/kvm: Set IBPB when switching VM
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Set IBPB (Indirect branch prediction barrier) when switching VM.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 665076ad780e8620505c742cfcb4b0f3fb99324a)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/vmx.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index daff9962c90a..8df195bbb41d 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -1488,6 +1488,7 @@ static void vmcs_load(struct vmcs *vmcs)
+       if (error)
+               printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
+                      vmcs, phys_addr);
++
+ }
+ 
+ #ifdef CONFIG_KEXEC_CORE
+@@ -2268,6 +2269,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+       if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
+               per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
+               vmcs_load(vmx->loaded_vmcs->vmcs);
++              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++                      native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+       }
+ 
+       if (!already_loaded) {
+-- 
+2.14.2
+
diff --git a/patches/kernel/0284-x86-kvm-Toggle-IBRS-on-VM-entry-and-exit.patch b/patches/kernel/0284-x86-kvm-Toggle-IBRS-on-VM-entry-and-exit.patch

deleted file mode 100644 (file)

index 5fae670..0000000
--- a/patches/kernel/0284-x86-kvm-Toggle-IBRS-on-VM-entry-and-exit.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Fri, 20 Oct 2017 17:04:35 -0700
-Subject: [PATCH] x86/kvm: Toggle IBRS on VM entry and exit
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Restore guest IBRS on VM entry and set it to 1 on VM exit
-back to kernel.
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 3dc28210342f174270bcefac74ef5d0b52ffd846)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kvm/vmx.c | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
-index 8df195bbb41d..57d538fc7c75 100644
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -9101,6 +9101,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
-               __write_pkru(vcpu->arch.pkru);
- 
-       atomic_switch_perf_msrs(vmx);
-+
-+      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+              add_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL,
-+                      vcpu->arch.spec_ctrl, FEATURE_ENABLE_IBRS);
-+
-       debugctlmsr = get_debugctlmsr();
- 
-       vmx_arm_hv_timer(vcpu);
--- 
-2.14.2
-
diff --git a/patches/kernel/0285-x86-kvm-Pad-RSB-on-VM-transition.patch b/patches/kernel/0285-x86-kvm-Pad-RSB-on-VM-transition.patch

deleted file mode 100644 (file)

index f337f15..0000000
--- a/patches/kernel/0285-x86-kvm-Pad-RSB-on-VM-transition.patch
+++ /dev/null
@@ -1,154 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Fri, 20 Oct 2017 17:05:54 -0700
-Subject: [PATCH] x86/kvm: Pad RSB on VM transition
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Add code to pad the local CPU's RSB entries to protect
-from previous less privilege mode.
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 5369368d3520addb2ffb2413cfa7e8f3efe2e31d)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/kvm_host.h | 103 ++++++++++++++++++++++++++++++++++++++++
- arch/x86/kvm/vmx.c              |   2 +
- 2 files changed, 105 insertions(+)
-
-diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
-index 1953c0a5b972..4117a97228a2 100644
---- a/arch/x86/include/asm/kvm_host.h
-+++ b/arch/x86/include/asm/kvm_host.h
-@@ -125,6 +125,109 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
- 
- #define ASYNC_PF_PER_VCPU 64
- 
-+static inline void stuff_RSB(void)
-+{
-+        __asm__ __volatile__("  \n\
-+      call .label1    \n\
-+      pause     \n\
-+.label1:        \n\
-+      call .label2    \n\
-+      pause     \n\
-+.label2:        \n\
-+      call .label3    \n\
-+      pause     \n\
-+.label3:        \n\
-+      call .label4    \n\
-+      pause     \n\
-+.label4:        \n\
-+      call .label5    \n\
-+      pause     \n\
-+.label5:        \n\
-+      call .label6    \n\
-+      pause     \n\
-+.label6:        \n\
-+      call .label7    \n\
-+      pause     \n\
-+.label7:        \n\
-+      call .label8    \n\
-+      pause     \n\
-+.label8:        \n\
-+      call .label9    \n\
-+      pause     \n\
-+.label9:        \n\
-+      call .label10   \n\
-+      pause     \n\
-+.label10:       \n\
-+      call .label11   \n\
-+      pause     \n\
-+.label11:       \n\
-+      call .label12   \n\
-+      pause     \n\
-+.label12:       \n\
-+      call .label13   \n\
-+      pause     \n\
-+.label13:       \n\
-+      call .label14   \n\
-+      pause     \n\
-+.label14:       \n\
-+      call .label15   \n\
-+      pause     \n\
-+.label15:       \n\
-+      call .label16   \n\
-+      pause     \n\
-+.label16:     \n\
-+      call .label17   \n\
-+      pause   \n\
-+.label17:     \n\
-+      call .label18   \n\
-+      pause   \n\
-+.label18:     \n\
-+      call .label19   \n\
-+      pause   \n\
-+.label19:     \n\
-+      call .label20   \n\
-+      pause   \n\
-+.label20:     \n\
-+      call .label21   \n\
-+      pause   \n\
-+.label21:     \n\
-+      call .label22   \n\
-+      pause   \n\
-+.label22:     \n\
-+      call .label23   \n\
-+      pause   \n\
-+.label23:     \n\
-+      call .label24   \n\
-+      pause   \n\
-+.label24:     \n\
-+      call .label25   \n\
-+      pause   \n\
-+.label25:     \n\
-+      call .label26   \n\
-+      pause   \n\
-+.label26:     \n\
-+      call .label27   \n\
-+      pause   \n\
-+.label27:     \n\
-+      call .label28   \n\
-+      pause   \n\
-+.label28:     \n\
-+      call .label29   \n\
-+      pause   \n\
-+.label29:     \n\
-+      call .label30   \n\
-+      pause   \n\
-+.label30:     \n\
-+      call .label31   \n\
-+      pause   \n\
-+.label31:     \n\
-+      call .label32   \n\
-+      pause   \n\
-+.label32: \n\
-+      add $(32*8), %%rsp      \n\
-+": : :"memory");
-+}
-+
- enum kvm_reg {
-       VCPU_REGS_RAX = 0,
-       VCPU_REGS_RCX = 1,
-diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
-index 57d538fc7c75..496884b6467f 100644
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -9228,6 +9228,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
- #endif
-             );
- 
-+      stuff_RSB();
-+
-       /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
-       if (debugctlmsr)
-               update_debugctlmsr(debugctlmsr);
--- 
-2.14.2
-
diff --git a/patches/kernel/0285-x86-kvm-Toggle-IBRS-on-VM-entry-and-exit.patch b/patches/kernel/0285-x86-kvm-Toggle-IBRS-on-VM-entry-and-exit.patch

new file mode 100644 (file)

index 0000000..5fae670
--- /dev/null
+++ b/patches/kernel/0285-x86-kvm-Toggle-IBRS-on-VM-entry-and-exit.patch
@@ -0,0 +1,42 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 20 Oct 2017 17:04:35 -0700
+Subject: [PATCH] x86/kvm: Toggle IBRS on VM entry and exit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Restore guest IBRS on VM entry and set it to 1 on VM exit
+back to kernel.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 3dc28210342f174270bcefac74ef5d0b52ffd846)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/vmx.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 8df195bbb41d..57d538fc7c75 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -9101,6 +9101,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+               __write_pkru(vcpu->arch.pkru);
+ 
+       atomic_switch_perf_msrs(vmx);
++
++      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++              add_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL,
++                      vcpu->arch.spec_ctrl, FEATURE_ENABLE_IBRS);
++
+       debugctlmsr = get_debugctlmsr();
+ 
+       vmx_arm_hv_timer(vcpu);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0286-x86-kvm-Pad-RSB-on-VM-transition.patch b/patches/kernel/0286-x86-kvm-Pad-RSB-on-VM-transition.patch

new file mode 100644 (file)

index 0000000..f337f15
--- /dev/null
+++ b/patches/kernel/0286-x86-kvm-Pad-RSB-on-VM-transition.patch
@@ -0,0 +1,154 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 20 Oct 2017 17:05:54 -0700
+Subject: [PATCH] x86/kvm: Pad RSB on VM transition
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Add code to pad the local CPU's RSB entries to protect
+from previous less privilege mode.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 5369368d3520addb2ffb2413cfa7e8f3efe2e31d)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/kvm_host.h | 103 ++++++++++++++++++++++++++++++++++++++++
+ arch/x86/kvm/vmx.c              |   2 +
+ 2 files changed, 105 insertions(+)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 1953c0a5b972..4117a97228a2 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -125,6 +125,109 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+ 
+ #define ASYNC_PF_PER_VCPU 64
+ 
++static inline void stuff_RSB(void)
++{
++        __asm__ __volatile__("  \n\
++      call .label1    \n\
++      pause     \n\
++.label1:        \n\
++      call .label2    \n\
++      pause     \n\
++.label2:        \n\
++      call .label3    \n\
++      pause     \n\
++.label3:        \n\
++      call .label4    \n\
++      pause     \n\
++.label4:        \n\
++      call .label5    \n\
++      pause     \n\
++.label5:        \n\
++      call .label6    \n\
++      pause     \n\
++.label6:        \n\
++      call .label7    \n\
++      pause     \n\
++.label7:        \n\
++      call .label8    \n\
++      pause     \n\
++.label8:        \n\
++      call .label9    \n\
++      pause     \n\
++.label9:        \n\
++      call .label10   \n\
++      pause     \n\
++.label10:       \n\
++      call .label11   \n\
++      pause     \n\
++.label11:       \n\
++      call .label12   \n\
++      pause     \n\
++.label12:       \n\
++      call .label13   \n\
++      pause     \n\
++.label13:       \n\
++      call .label14   \n\
++      pause     \n\
++.label14:       \n\
++      call .label15   \n\
++      pause     \n\
++.label15:       \n\
++      call .label16   \n\
++      pause     \n\
++.label16:     \n\
++      call .label17   \n\
++      pause   \n\
++.label17:     \n\
++      call .label18   \n\
++      pause   \n\
++.label18:     \n\
++      call .label19   \n\
++      pause   \n\
++.label19:     \n\
++      call .label20   \n\
++      pause   \n\
++.label20:     \n\
++      call .label21   \n\
++      pause   \n\
++.label21:     \n\
++      call .label22   \n\
++      pause   \n\
++.label22:     \n\
++      call .label23   \n\
++      pause   \n\
++.label23:     \n\
++      call .label24   \n\
++      pause   \n\
++.label24:     \n\
++      call .label25   \n\
++      pause   \n\
++.label25:     \n\
++      call .label26   \n\
++      pause   \n\
++.label26:     \n\
++      call .label27   \n\
++      pause   \n\
++.label27:     \n\
++      call .label28   \n\
++      pause   \n\
++.label28:     \n\
++      call .label29   \n\
++      pause   \n\
++.label29:     \n\
++      call .label30   \n\
++      pause   \n\
++.label30:     \n\
++      call .label31   \n\
++      pause   \n\
++.label31:     \n\
++      call .label32   \n\
++      pause   \n\
++.label32: \n\
++      add $(32*8), %%rsp      \n\
++": : :"memory");
++}
++
+ enum kvm_reg {
+       VCPU_REGS_RAX = 0,
+       VCPU_REGS_RCX = 1,
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 57d538fc7c75..496884b6467f 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -9228,6 +9228,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ #endif
+             );
+ 
++      stuff_RSB();
++
+       /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
+       if (debugctlmsr)
+               update_debugctlmsr(debugctlmsr);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0286-x86-spec_ctrl-Add-sysctl-knobs-to-enable-disable-SPE.patch b/patches/kernel/0286-x86-spec_ctrl-Add-sysctl-knobs-to-enable-disable-SPE.patch

deleted file mode 100644 (file)

index ef9bf3f..0000000
--- a/patches/kernel/0286-x86-spec_ctrl-Add-sysctl-knobs-to-enable-disable-SPE.patch
+++ /dev/null
@@ -1,613 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim@otc-grantley-02.jf.intel.com>
-Date: Thu, 16 Nov 2017 04:47:48 -0800
-Subject: [PATCH] x86/spec_ctrl: Add sysctl knobs to enable/disable SPEC_CTRL
- feature
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-There are 2 ways to control IBPB and IBRS
-
-1. At boot time
-       noibrs kernel boot parameter will disable IBRS usage
-       noibpb kernel boot parameter will disable IBPB usage
-Otherwise if the above parameters are not specified, the system
-will enable ibrs and ibpb usage if the cpu supports it.
-
-2. At run time
-       echo 0 > /proc/sys/kernel/ibrs_enabled will turn off IBRS
-       echo 1 > /proc/sys/kernel/ibrs_enabled will turn on IBRS in kernel
-       echo 2 > /proc/sys/kernel/ibrs_enabled will turn on IBRS in both userspace and kernel
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-[marcelo.cerri@canonical.com: add x86 guards to kernel/smp.c]
-[marcelo.cerri@canonical.com: include asm/msr.h under x86 guard in kernel/sysctl.c]
-Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
-(cherry picked from commit 23225db7b02c7f8b94e5d5050987430089e6f7cc)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- Documentation/admin-guide/kernel-parameters.txt |  10 ++
- arch/x86/include/asm/mwait.h                    |   4 +-
- arch/x86/include/asm/spec_ctrl.h                |  24 ++++-
- include/linux/smp.h                             |  87 +++++++++++++++++
- arch/x86/kernel/cpu/intel.c                     |  11 ++-
- arch/x86/kernel/cpu/microcode/core.c            |  11 +++
- arch/x86/kernel/process.c                       |   6 +-
- arch/x86/kernel/smpboot.c                       |   4 +-
- arch/x86/kvm/vmx.c                              |   4 +-
- arch/x86/lib/delay.c                            |   6 +-
- arch/x86/mm/tlb.c                               |   2 +-
- kernel/smp.c                                    |  41 ++++++++
- kernel/sysctl.c                                 | 125 ++++++++++++++++++++++++
- 13 files changed, 316 insertions(+), 19 deletions(-)
-
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 1a6ebc6cdf26..e7216bc05b3b 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -2566,6 +2566,16 @@
-                       noexec=on: enable non-executable mappings (default)
-                       noexec=off: disable non-executable mappings
- 
-+      noibrs          [X86]
-+                      Don't use indirect branch restricted speculation (IBRS)
-+                      feature when running in secure environment,
-+                      to avoid performance overhead.
-+
-+      noibpb          [X86]
-+                      Don't use indirect branch prediction barrier (IBPB)
-+                      feature when running in secure environment,
-+                      to avoid performance overhead.
-+
-       nosmap          [X86]
-                       Disable SMAP (Supervisor Mode Access Prevention)
-                       even if it is supported by processor.
-diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
-index f15120ada161..d665daab3f84 100644
---- a/arch/x86/include/asm/mwait.h
-+++ b/arch/x86/include/asm/mwait.h
-@@ -107,14 +107,14 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
-                       mb();
-               }
- 
--              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+              if (ibrs_inuse)
-                       native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
- 
-               __monitor((void *)&current_thread_info()->flags, 0, 0);
-               if (!need_resched())
-                       __mwait(eax, ecx);
- 
--              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+              if (ibrs_inuse)
-                       native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
-       }
-       current_clr_polling();
-diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
-index 55ee1f36bda2..4c69e51261cc 100644
---- a/arch/x86/include/asm/spec_ctrl.h
-+++ b/arch/x86/include/asm/spec_ctrl.h
-@@ -8,6 +8,9 @@
- 
- #ifdef __ASSEMBLY__
- 
-+.extern use_ibrs
-+.extern use_ibpb
-+
- #define __ASM_ENABLE_IBRS                     \
-       pushq %rax;                             \
-       pushq %rcx;                             \
-@@ -104,15 +107,30 @@
-       add $(32*8), %rsp;
- 
- .macro ENABLE_IBRS
--ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS), X86_FEATURE_SPEC_CTRL
-+      testl   $1, use_ibrs
-+      jz      10f
-+      __ASM_ENABLE_IBRS
-+      jmp 20f
-+10:
-+      lfence
-+20:
- .endm
- 
- .macro ENABLE_IBRS_CLOBBER
--ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS_CLOBBER), X86_FEATURE_SPEC_CTRL
-+      testl   $1, use_ibrs
-+      jz      11f
-+      __ASM_ENABLE_IBRS_CLOBBER
-+      jmp 21f
-+11:
-+      lfence
-+21:
- .endm
- 
- .macro DISABLE_IBRS
--ALTERNATIVE "", __stringify(__ASM_DISABLE_IBRS), X86_FEATURE_SPEC_CTRL
-+      testl   $1, use_ibrs
-+      jz      9f
-+      __ASM_DISABLE_IBRS
-+9:
- .endm
- 
- .macro STUFF_RSB
-diff --git a/include/linux/smp.h b/include/linux/smp.h
-index 68123c1fe549..e2935c0a1bb4 100644
---- a/include/linux/smp.h
-+++ b/include/linux/smp.h
-@@ -50,6 +50,93 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
- 
- int smp_call_function_single_async(int cpu, struct call_single_data *csd);
- 
-+#ifdef CONFIG_X86
-+/* indicate usage of IBRS to control execution speculation */
-+extern int use_ibrs;
-+extern u32 sysctl_ibrs_enabled;
-+extern struct mutex spec_ctrl_mutex;
-+#define ibrs_supported                (use_ibrs & 0x2)
-+#define ibrs_disabled         (use_ibrs & 0x4)
-+static inline void set_ibrs_inuse(void)
-+{
-+      if (ibrs_supported)
-+              use_ibrs |= 0x1;
-+}
-+static inline void clear_ibrs_inuse(void)
-+{
-+      use_ibrs &= ~0x1;
-+}
-+static inline int check_ibrs_inuse(void)
-+{
-+      if (use_ibrs & 0x1)
-+              return 1;
-+      else
-+              /* rmb to prevent wrong speculation for security */
-+              rmb();
-+      return 0;
-+}
-+static inline void set_ibrs_supported(void)
-+{
-+      use_ibrs |= 0x2;
-+      if (!ibrs_disabled)
-+              set_ibrs_inuse();
-+}
-+static inline void set_ibrs_disabled(void)
-+{
-+      use_ibrs |= 0x4;
-+      if (check_ibrs_inuse())
-+              clear_ibrs_inuse();
-+}
-+static inline void clear_ibrs_disabled(void)
-+{
-+      use_ibrs &= ~0x4;
-+      set_ibrs_inuse();
-+}
-+#define ibrs_inuse            (check_ibrs_inuse())
-+
-+/* indicate usage of IBPB to control execution speculation */
-+extern int use_ibpb;
-+extern u32 sysctl_ibpb_enabled;
-+#define ibpb_supported                (use_ibpb & 0x2)
-+#define ibpb_disabled         (use_ibpb & 0x4)
-+static inline void set_ibpb_inuse(void)
-+{
-+      if (ibpb_supported)
-+              use_ibpb |= 0x1;
-+}
-+static inline void clear_ibpb_inuse(void)
-+{
-+      use_ibpb &= ~0x1;
-+}
-+static inline int check_ibpb_inuse(void)
-+{
-+      if (use_ibpb & 0x1)
-+              return 1;
-+      else
-+              /* rmb to prevent wrong speculation for security */
-+              rmb();
-+      return 0;
-+}
-+static inline void set_ibpb_supported(void)
-+{
-+      use_ibpb |= 0x2;
-+      if (!ibpb_disabled)
-+              set_ibpb_inuse();
-+}
-+static inline void set_ibpb_disabled(void)
-+{
-+      use_ibpb |= 0x4;
-+      if (check_ibpb_inuse())
-+              clear_ibpb_inuse();
-+}
-+static inline void clear_ibpb_disabled(void)
-+{
-+      use_ibpb &= ~0x4;
-+      set_ibpb_inuse();
-+}
-+#define ibpb_inuse            (check_ibpb_inuse())
-+#endif
-+
- #ifdef CONFIG_SMP
- 
- #include <linux/preempt.h>
-diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
-index f1d94c73625a..c69ea2efbed1 100644
---- a/arch/x86/kernel/cpu/intel.c
-+++ b/arch/x86/kernel/cpu/intel.c
-@@ -628,10 +628,17 @@ static void init_intel(struct cpuinfo_x86 *c)
- 
-       init_intel_misc_features(c);
- 
--      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
-                 printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n");
--        else
-+              set_ibrs_supported();
-+              set_ibpb_supported();
-+              if (ibrs_inuse)
-+                      sysctl_ibrs_enabled = 1;
-+              if (ibpb_inuse)
-+                      sysctl_ibpb_enabled = 1;
-+        } else {
-                 printk_once(KERN_INFO "FEATURE SPEC_CTRL Not Present\n");
-+      }
- }
- 
- #ifdef CONFIG_X86_32
-diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
-index c4fa4a85d4cb..6450aeda72fc 100644
---- a/arch/x86/kernel/cpu/microcode/core.c
-+++ b/arch/x86/kernel/cpu/microcode/core.c
-@@ -535,6 +535,17 @@ static ssize_t reload_store(struct device *dev,
-       }
-       if (!ret)
-               perf_check_microcode();
-+
-+      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
-+              printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n");
-+              set_ibrs_supported();
-+              set_ibpb_supported();
-+              if (ibrs_inuse)
-+                      sysctl_ibrs_enabled = 1;
-+              if (ibpb_inuse)
-+                      sysctl_ibpb_enabled = 1;
-+      }
-+
-       mutex_unlock(&microcode_mutex);
-       put_online_cpus();
- 
-diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index 3adb3806a284..3fdf5358998e 100644
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -447,16 +447,16 @@ static __cpuidle void mwait_idle(void)
-                       mb(); /* quirk */
-               }
- 
--              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+              if (ibrs_inuse)
-                         native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
- 
-               __monitor((void *)&current_thread_info()->flags, 0, 0);
-               if (!need_resched()) {
-                       __sti_mwait(0, 0);
--                      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+                      if (ibrs_inuse)
-                               native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
-               } else {
--                      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+                      if (ibrs_inuse)
-                               native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
-                       local_irq_enable();
-               }
-diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
-index a652bff7add4..9317aa4a7446 100644
---- a/arch/x86/kernel/smpboot.c
-+++ b/arch/x86/kernel/smpboot.c
-@@ -1693,14 +1693,14 @@ void native_play_dead(void)
-       play_dead_common();
-       tboot_shutdown(TB_SHUTDOWN_WFS);
- 
--      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+      if (ibrs_inuse)
-               native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
- 
-       mwait_play_dead();      /* Only returns on failure */
-       if (cpuidle_play_dead())
-               hlt_play_dead();
- 
--      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+      if (ibrs_inuse)
-               native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
- }
- 
-diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
-index 496884b6467f..d2168203bddc 100644
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -2269,7 +2269,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-       if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
-               per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
-               vmcs_load(vmx->loaded_vmcs->vmcs);
--              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+              if (ibpb_inuse)
-                       native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
-       }
- 
-@@ -9102,7 +9102,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
- 
-       atomic_switch_perf_msrs(vmx);
- 
--      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+      if (ibrs_inuse)
-               add_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL,
-                       vcpu->arch.spec_ctrl, FEATURE_ENABLE_IBRS);
- 
-diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
-index b088463973e4..72a174642550 100644
---- a/arch/x86/lib/delay.c
-+++ b/arch/x86/lib/delay.c
-@@ -107,8 +107,7 @@ static void delay_mwaitx(unsigned long __loops)
-       for (;;) {
-               delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
- 
--              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
--                      (delay > IBRS_DISABLE_THRESHOLD))
-+              if (ibrs_inuse && (delay > IBRS_DISABLE_THRESHOLD))
-                       native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
- 
-               /*
-@@ -124,8 +123,7 @@ static void delay_mwaitx(unsigned long __loops)
-                */
-               __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
- 
--              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
--                      (delay > IBRS_DISABLE_THRESHOLD))
-+              if (ibrs_inuse && (delay > IBRS_DISABLE_THRESHOLD))
-                       native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
- 
-               end = rdtsc_ordered();
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 301e6efbc514..6365f769de3d 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -221,7 +221,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
-               bool need_flush;
- 
-               /* Null tsk means switching to kernel, so that's safe */
--              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) && tsk &&
-+              if (ibpb_inuse && tsk &&
-                       ___ptrace_may_access(tsk, current, PTRACE_MODE_IBPB))
-                       native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
- 
-diff --git a/kernel/smp.c b/kernel/smp.c
-index 3061483cb3ad..3bece045f4a4 100644
---- a/kernel/smp.c
-+++ b/kernel/smp.c
-@@ -498,6 +498,26 @@ EXPORT_SYMBOL(smp_call_function);
- unsigned int setup_max_cpus = NR_CPUS;
- EXPORT_SYMBOL(setup_max_cpus);
- 
-+#ifdef CONFIG_X86
-+/*
-+ * use IBRS
-+ * bit 0 = indicate if ibrs is currently in use
-+ * bit 1 = indicate if system supports ibrs
-+ * bit 2 = indicate if admin disables ibrs
-+*/
-+
-+int use_ibrs;
-+EXPORT_SYMBOL(use_ibrs);
-+
-+/*
-+ * use IBRS
-+ * bit 0 = indicate if ibpb is currently in use
-+ * bit 1 = indicate if system supports ibpb
-+ * bit 2 = indicate if admin disables ibpb
-+*/
-+int use_ibpb;
-+EXPORT_SYMBOL(use_ibpb);
-+#endif
- 
- /*
-  * Setup routine for controlling SMP activation
-@@ -522,6 +542,27 @@ static int __init nosmp(char *str)
- 
- early_param("nosmp", nosmp);
- 
-+#ifdef CONFIG_X86
-+static int __init noibrs(char *str)
-+{
-+      set_ibrs_disabled();
-+
-+      return 0;
-+}
-+
-+early_param("noibrs", noibrs);
-+
-+static int __init noibpb(char *str)
-+{
-+      set_ibpb_disabled();
-+
-+      return 0;
-+}
-+
-+early_param("noibpb", noibpb);
-+#endif
-+
-+
- /* this is hard limit */
- static int __init nrcpus(char *str)
- {
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index 7ab08d5728e6..69c37bd6251a 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -72,6 +72,7 @@
- #include <asm/processor.h>
- 
- #ifdef CONFIG_X86
-+#include <asm/msr.h>
- #include <asm/nmi.h>
- #include <asm/stacktrace.h>
- #include <asm/io.h>
-@@ -222,6 +223,15 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
-               void __user *buffer, size_t *lenp, loff_t *ppos);
- #endif
- 
-+#ifdef CONFIG_X86
-+int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write,
-+                 void __user *buffer, size_t *lenp, loff_t *ppos);
-+int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write,
-+                 void __user *buffer, size_t *lenp, loff_t *ppos);
-+int proc_dointvec_ibrs_dump(struct ctl_table *table, int write,
-+                 void __user *buffer, size_t *lenp, loff_t *ppos);
-+#endif
-+
- #ifdef CONFIG_MAGIC_SYSRQ
- /* Note: sysrq code uses it's own private copy */
- static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
-@@ -258,6 +268,12 @@ extern struct ctl_table epoll_table[];
- int sysctl_legacy_va_layout;
- #endif
- 
-+u32 sysctl_ibrs_dump = 0;
-+u32 sysctl_ibrs_enabled = 0;
-+EXPORT_SYMBOL(sysctl_ibrs_enabled);
-+u32 sysctl_ibpb_enabled = 0;
-+EXPORT_SYMBOL(sysctl_ibpb_enabled);
-+
- /* The default sysctl tables: */
- 
- static struct ctl_table sysctl_base_table[] = {
-@@ -1241,6 +1257,35 @@ static struct ctl_table kern_table[] = {
-               .extra1         = &zero,
-               .extra2         = &one,
-       },
-+#endif
-+#ifdef CONFIG_X86
-+      {
-+              .procname       = "ibrs_enabled",
-+              .data           = &sysctl_ibrs_enabled,
-+              .maxlen         = sizeof(unsigned int),
-+              .mode           = 0644,
-+              .proc_handler   = proc_dointvec_ibrs_ctrl,
-+              .extra1         = &zero,
-+              .extra2         = &two,
-+      },
-+      {
-+              .procname       = "ibpb_enabled",
-+              .data           = &sysctl_ibpb_enabled,
-+              .maxlen         = sizeof(unsigned int),
-+              .mode           = 0644,
-+              .proc_handler   = proc_dointvec_ibpb_ctrl,
-+              .extra1         = &zero,
-+              .extra2         = &one,
-+      },
-+      {
-+              .procname       = "ibrs_dump",
-+              .data           = &sysctl_ibrs_dump,
-+              .maxlen         = sizeof(unsigned int),
-+              .mode           = 0644,
-+              .proc_handler   = proc_dointvec_ibrs_dump,
-+              .extra1         = &zero,
-+              .extra2         = &one,
-+      },
- #endif
-       { }
- };
-@@ -2585,6 +2630,86 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
-                               do_proc_dointvec_minmax_conv, &param);
- }
- 
-+#ifdef CONFIG_X86
-+int proc_dointvec_ibrs_dump(struct ctl_table *table, int write,
-+      void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+      int ret;
-+      unsigned int cpu;
-+
-+      ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-+      printk("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
-+      printk("use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
-+      for_each_online_cpu(cpu) {
-+             u64 val;
-+
-+             if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
-+                     rdmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, &val);
-+             else
-+                     val = 0;
-+             printk("read cpu %d ibrs val %lu\n", cpu, (unsigned long) val);
-+      }
-+      return ret;
-+}
-+
-+int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write,
-+      void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+      int ret;
-+      unsigned int cpu;
-+
-+      ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-+      pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
-+      pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
-+      if (sysctl_ibrs_enabled == 0) {
-+              /* always set IBRS off */
-+              set_ibrs_disabled();
-+              if (ibrs_supported) {
-+                      for_each_online_cpu(cpu)
-+                              wrmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, 0x0);
-+              }
-+      } else if (sysctl_ibrs_enabled == 2) {
-+              /* always set IBRS on, even in user space */
-+              clear_ibrs_disabled();
-+              if (ibrs_supported) {
-+                      for_each_online_cpu(cpu)
-+                              wrmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
-+              } else {
-+                      sysctl_ibrs_enabled = 0;
-+              }
-+      } else if (sysctl_ibrs_enabled == 1) {
-+              /* use IBRS in kernel */
-+              clear_ibrs_disabled();
-+              if (!ibrs_inuse)
-+                      /* platform don't support ibrs */
-+                      sysctl_ibrs_enabled = 0;
-+      }
-+      pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
-+      return ret;
-+}
-+
-+int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write,
-+      void __user *buffer, size_t *lenp, loff_t *ppos)
-+{
-+      int ret;
-+
-+      ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-+      pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
-+      pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
-+      if (sysctl_ibpb_enabled == 0)
-+              set_ibpb_disabled();
-+      else if (sysctl_ibpb_enabled == 1) {
-+              clear_ibpb_disabled();
-+              if (!ibpb_inuse)
-+                      /* platform don't support ibpb */
-+                      sysctl_ibpb_enabled = 0;
-+      }
-+      pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
-+      return ret;
-+}
-+#endif
-+
-+
- struct do_proc_douintvec_minmax_conv_param {
-       unsigned int *min;
-       unsigned int *max;
--- 
-2.14.2
-
diff --git a/patches/kernel/0287-x86-spec_ctrl-Add-lock-to-serialize-changes-to-ibrs-.patch b/patches/kernel/0287-x86-spec_ctrl-Add-lock-to-serialize-changes-to-ibrs-.patch

deleted file mode 100644 (file)

index ac1928c..0000000
--- a/patches/kernel/0287-x86-spec_ctrl-Add-lock-to-serialize-changes-to-ibrs-.patch
+++ /dev/null
@@ -1,166 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Mon, 20 Nov 2017 13:47:54 -0800
-Subject: [PATCH] x86/spec_ctrl: Add lock to serialize changes to ibrs and ibpb
- control
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 85789933bc45a3e763823675bd0d80e3e617f234)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/intel.c          | 22 ++++++++++++----------
- arch/x86/kernel/cpu/microcode/core.c |  2 ++
- kernel/smp.c                         |  4 ++++
- kernel/sysctl.c                      | 14 +++++++++++++-
- 4 files changed, 31 insertions(+), 11 deletions(-)
-
-diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
-index c69ea2efbed1..8d558e24783c 100644
---- a/arch/x86/kernel/cpu/intel.c
-+++ b/arch/x86/kernel/cpu/intel.c
-@@ -628,16 +628,18 @@ static void init_intel(struct cpuinfo_x86 *c)
- 
-       init_intel_misc_features(c);
- 
--      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
--                printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n");
--              set_ibrs_supported();
--              set_ibpb_supported();
--              if (ibrs_inuse)
--                      sysctl_ibrs_enabled = 1;
--              if (ibpb_inuse)
--                      sysctl_ibpb_enabled = 1;
--        } else {
--                printk_once(KERN_INFO "FEATURE SPEC_CTRL Not Present\n");
-+      if (!c->cpu_index) {
-+              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
-+                      printk(KERN_INFO "FEATURE SPEC_CTRL Present\n");
-+                      set_ibrs_supported();
-+                      set_ibpb_supported();
-+                      if (ibrs_inuse)
-+                              sysctl_ibrs_enabled = 1;
-+                      if (ibpb_inuse)
-+                              sysctl_ibpb_enabled = 1;
-+              } else {
-+                      printk(KERN_INFO "FEATURE SPEC_CTRL Not Present\n");
-+              }
-       }
- }
- 
-diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
-index 6450aeda72fc..55086921d29e 100644
---- a/arch/x86/kernel/cpu/microcode/core.c
-+++ b/arch/x86/kernel/cpu/microcode/core.c
-@@ -538,12 +538,14 @@ static ssize_t reload_store(struct device *dev,
- 
-       if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
-               printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n");
-+              mutex_lock(&spec_ctrl_mutex);
-               set_ibrs_supported();
-               set_ibpb_supported();
-               if (ibrs_inuse)
-                       sysctl_ibrs_enabled = 1;
-               if (ibpb_inuse)
-                       sysctl_ibpb_enabled = 1;
-+              mutex_unlock(&spec_ctrl_mutex);
-       }
- 
-       mutex_unlock(&microcode_mutex);
-diff --git a/kernel/smp.c b/kernel/smp.c
-index 3bece045f4a4..a224ec0c540c 100644
---- a/kernel/smp.c
-+++ b/kernel/smp.c
-@@ -519,6 +519,10 @@ int use_ibpb;
- EXPORT_SYMBOL(use_ibpb);
- #endif
- 
-+/* mutex to serialize IBRS & IBPB control changes */
-+DEFINE_MUTEX(spec_ctrl_mutex);
-+EXPORT_SYMBOL(spec_ctrl_mutex);
-+
- /*
-  * Setup routine for controlling SMP activation
-  *
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index 69c37bd6251a..47a37792109d 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -69,6 +69,7 @@
- #include <linux/mount.h>
- 
- #include <linux/uaccess.h>
-+#include <linux/mutex.h>
- #include <asm/processor.h>
- 
- #ifdef CONFIG_X86
-@@ -2634,12 +2635,17 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
- int proc_dointvec_ibrs_dump(struct ctl_table *table, int write,
-       void __user *buffer, size_t *lenp, loff_t *ppos)
- {
--      int ret;
-+      int ret, orig_inuse;
-       unsigned int cpu;
- 
-+
-       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-       printk("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
-       printk("use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
-+      mutex_lock(&spec_ctrl_mutex);
-+      orig_inuse = use_ibrs;
-+      /* temporary halt to ibrs usage to dump ibrs values */
-+      clear_ibrs_inuse();
-       for_each_online_cpu(cpu) {
-              u64 val;
- 
-@@ -2649,6 +2655,8 @@ int proc_dointvec_ibrs_dump(struct ctl_table *table, int write,
-                      val = 0;
-              printk("read cpu %d ibrs val %lu\n", cpu, (unsigned long) val);
-       }
-+      use_ibrs = orig_inuse;
-+      mutex_unlock(&spec_ctrl_mutex);
-       return ret;
- }
- 
-@@ -2661,6 +2669,7 @@ int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write,
-       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-       pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
-       pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
-+      mutex_lock(&spec_ctrl_mutex);
-       if (sysctl_ibrs_enabled == 0) {
-               /* always set IBRS off */
-               set_ibrs_disabled();
-@@ -2684,6 +2693,7 @@ int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write,
-                       /* platform don't support ibrs */
-                       sysctl_ibrs_enabled = 0;
-       }
-+      mutex_unlock(&spec_ctrl_mutex);
-       pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
-       return ret;
- }
-@@ -2696,6 +2706,7 @@ int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write,
-       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-       pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
-       pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
-+      mutex_lock(&spec_ctrl_mutex);
-       if (sysctl_ibpb_enabled == 0)
-               set_ibpb_disabled();
-       else if (sysctl_ibpb_enabled == 1) {
-@@ -2704,6 +2715,7 @@ int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write,
-                       /* platform don't support ibpb */
-                       sysctl_ibpb_enabled = 0;
-       }
-+      mutex_unlock(&spec_ctrl_mutex);
-       pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
-       return ret;
- }
--- 
-2.14.2
-
diff --git a/patches/kernel/0287-x86-spec_ctrl-Add-sysctl-knobs-to-enable-disable-SPE.patch b/patches/kernel/0287-x86-spec_ctrl-Add-sysctl-knobs-to-enable-disable-SPE.patch

new file mode 100644 (file)

index 0000000..ef9bf3f
--- /dev/null
+++ b/patches/kernel/0287-x86-spec_ctrl-Add-sysctl-knobs-to-enable-disable-SPE.patch
@@ -0,0 +1,613 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim@otc-grantley-02.jf.intel.com>
+Date: Thu, 16 Nov 2017 04:47:48 -0800
+Subject: [PATCH] x86/spec_ctrl: Add sysctl knobs to enable/disable SPEC_CTRL
+ feature
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+There are 2 ways to control IBPB and IBRS
+
+1. At boot time
+       noibrs kernel boot parameter will disable IBRS usage
+       noibpb kernel boot parameter will disable IBPB usage
+Otherwise if the above parameters are not specified, the system
+will enable ibrs and ibpb usage if the cpu supports it.
+
+2. At run time
+       echo 0 > /proc/sys/kernel/ibrs_enabled will turn off IBRS
+       echo 1 > /proc/sys/kernel/ibrs_enabled will turn on IBRS in kernel
+       echo 2 > /proc/sys/kernel/ibrs_enabled will turn on IBRS in both userspace and kernel
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+[marcelo.cerri@canonical.com: add x86 guards to kernel/smp.c]
+[marcelo.cerri@canonical.com: include asm/msr.h under x86 guard in kernel/sysctl.c]
+Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
+(cherry picked from commit 23225db7b02c7f8b94e5d5050987430089e6f7cc)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  10 ++
+ arch/x86/include/asm/mwait.h                    |   4 +-
+ arch/x86/include/asm/spec_ctrl.h                |  24 ++++-
+ include/linux/smp.h                             |  87 +++++++++++++++++
+ arch/x86/kernel/cpu/intel.c                     |  11 ++-
+ arch/x86/kernel/cpu/microcode/core.c            |  11 +++
+ arch/x86/kernel/process.c                       |   6 +-
+ arch/x86/kernel/smpboot.c                       |   4 +-
+ arch/x86/kvm/vmx.c                              |   4 +-
+ arch/x86/lib/delay.c                            |   6 +-
+ arch/x86/mm/tlb.c                               |   2 +-
+ kernel/smp.c                                    |  41 ++++++++
+ kernel/sysctl.c                                 | 125 ++++++++++++++++++++++++
+ 13 files changed, 316 insertions(+), 19 deletions(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 1a6ebc6cdf26..e7216bc05b3b 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -2566,6 +2566,16 @@
+                       noexec=on: enable non-executable mappings (default)
+                       noexec=off: disable non-executable mappings
+ 
++      noibrs          [X86]
++                      Don't use indirect branch restricted speculation (IBRS)
++                      feature when running in secure environment,
++                      to avoid performance overhead.
++
++      noibpb          [X86]
++                      Don't use indirect branch prediction barrier (IBPB)
++                      feature when running in secure environment,
++                      to avoid performance overhead.
++
+       nosmap          [X86]
+                       Disable SMAP (Supervisor Mode Access Prevention)
+                       even if it is supported by processor.
+diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
+index f15120ada161..d665daab3f84 100644
+--- a/arch/x86/include/asm/mwait.h
++++ b/arch/x86/include/asm/mwait.h
+@@ -107,14 +107,14 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+                       mb();
+               }
+ 
+-              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++              if (ibrs_inuse)
+                       native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ 
+               __monitor((void *)&current_thread_info()->flags, 0, 0);
+               if (!need_resched())
+                       __mwait(eax, ecx);
+ 
+-              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++              if (ibrs_inuse)
+                       native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
+       }
+       current_clr_polling();
+diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
+index 55ee1f36bda2..4c69e51261cc 100644
+--- a/arch/x86/include/asm/spec_ctrl.h
++++ b/arch/x86/include/asm/spec_ctrl.h
+@@ -8,6 +8,9 @@
+ 
+ #ifdef __ASSEMBLY__
+ 
++.extern use_ibrs
++.extern use_ibpb
++
+ #define __ASM_ENABLE_IBRS                     \
+       pushq %rax;                             \
+       pushq %rcx;                             \
+@@ -104,15 +107,30 @@
+       add $(32*8), %rsp;
+ 
+ .macro ENABLE_IBRS
+-ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS), X86_FEATURE_SPEC_CTRL
++      testl   $1, use_ibrs
++      jz      10f
++      __ASM_ENABLE_IBRS
++      jmp 20f
++10:
++      lfence
++20:
+ .endm
+ 
+ .macro ENABLE_IBRS_CLOBBER
+-ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS_CLOBBER), X86_FEATURE_SPEC_CTRL
++      testl   $1, use_ibrs
++      jz      11f
++      __ASM_ENABLE_IBRS_CLOBBER
++      jmp 21f
++11:
++      lfence
++21:
+ .endm
+ 
+ .macro DISABLE_IBRS
+-ALTERNATIVE "", __stringify(__ASM_DISABLE_IBRS), X86_FEATURE_SPEC_CTRL
++      testl   $1, use_ibrs
++      jz      9f
++      __ASM_DISABLE_IBRS
++9:
+ .endm
+ 
+ .macro STUFF_RSB
+diff --git a/include/linux/smp.h b/include/linux/smp.h
+index 68123c1fe549..e2935c0a1bb4 100644
+--- a/include/linux/smp.h
++++ b/include/linux/smp.h
+@@ -50,6 +50,93 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
+ 
+ int smp_call_function_single_async(int cpu, struct call_single_data *csd);
+ 
++#ifdef CONFIG_X86
++/* indicate usage of IBRS to control execution speculation */
++extern int use_ibrs;
++extern u32 sysctl_ibrs_enabled;
++extern struct mutex spec_ctrl_mutex;
++#define ibrs_supported                (use_ibrs & 0x2)
++#define ibrs_disabled         (use_ibrs & 0x4)
++static inline void set_ibrs_inuse(void)
++{
++      if (ibrs_supported)
++              use_ibrs |= 0x1;
++}
++static inline void clear_ibrs_inuse(void)
++{
++      use_ibrs &= ~0x1;
++}
++static inline int check_ibrs_inuse(void)
++{
++      if (use_ibrs & 0x1)
++              return 1;
++      else
++              /* rmb to prevent wrong speculation for security */
++              rmb();
++      return 0;
++}
++static inline void set_ibrs_supported(void)
++{
++      use_ibrs |= 0x2;
++      if (!ibrs_disabled)
++              set_ibrs_inuse();
++}
++static inline void set_ibrs_disabled(void)
++{
++      use_ibrs |= 0x4;
++      if (check_ibrs_inuse())
++              clear_ibrs_inuse();
++}
++static inline void clear_ibrs_disabled(void)
++{
++      use_ibrs &= ~0x4;
++      set_ibrs_inuse();
++}
++#define ibrs_inuse            (check_ibrs_inuse())
++
++/* indicate usage of IBPB to control execution speculation */
++extern int use_ibpb;
++extern u32 sysctl_ibpb_enabled;
++#define ibpb_supported                (use_ibpb & 0x2)
++#define ibpb_disabled         (use_ibpb & 0x4)
++static inline void set_ibpb_inuse(void)
++{
++      if (ibpb_supported)
++              use_ibpb |= 0x1;
++}
++static inline void clear_ibpb_inuse(void)
++{
++      use_ibpb &= ~0x1;
++}
++static inline int check_ibpb_inuse(void)
++{
++      if (use_ibpb & 0x1)
++              return 1;
++      else
++              /* rmb to prevent wrong speculation for security */
++              rmb();
++      return 0;
++}
++static inline void set_ibpb_supported(void)
++{
++      use_ibpb |= 0x2;
++      if (!ibpb_disabled)
++              set_ibpb_inuse();
++}
++static inline void set_ibpb_disabled(void)
++{
++      use_ibpb |= 0x4;
++      if (check_ibpb_inuse())
++              clear_ibpb_inuse();
++}
++static inline void clear_ibpb_disabled(void)
++{
++      use_ibpb &= ~0x4;
++      set_ibpb_inuse();
++}
++#define ibpb_inuse            (check_ibpb_inuse())
++#endif
++
+ #ifdef CONFIG_SMP
+ 
+ #include <linux/preempt.h>
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
+index f1d94c73625a..c69ea2efbed1 100644
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -628,10 +628,17 @@ static void init_intel(struct cpuinfo_x86 *c)
+ 
+       init_intel_misc_features(c);
+ 
+-      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
+                 printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n");
+-        else
++              set_ibrs_supported();
++              set_ibpb_supported();
++              if (ibrs_inuse)
++                      sysctl_ibrs_enabled = 1;
++              if (ibpb_inuse)
++                      sysctl_ibpb_enabled = 1;
++        } else {
+                 printk_once(KERN_INFO "FEATURE SPEC_CTRL Not Present\n");
++      }
+ }
+ 
+ #ifdef CONFIG_X86_32
+diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
+index c4fa4a85d4cb..6450aeda72fc 100644
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -535,6 +535,17 @@ static ssize_t reload_store(struct device *dev,
+       }
+       if (!ret)
+               perf_check_microcode();
++
++      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
++              printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n");
++              set_ibrs_supported();
++              set_ibpb_supported();
++              if (ibrs_inuse)
++                      sysctl_ibrs_enabled = 1;
++              if (ibpb_inuse)
++                      sysctl_ibpb_enabled = 1;
++      }
++
+       mutex_unlock(&microcode_mutex);
+       put_online_cpus();
+ 
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index 3adb3806a284..3fdf5358998e 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -447,16 +447,16 @@ static __cpuidle void mwait_idle(void)
+                       mb(); /* quirk */
+               }
+ 
+-              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++              if (ibrs_inuse)
+                         native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ 
+               __monitor((void *)&current_thread_info()->flags, 0, 0);
+               if (!need_resched()) {
+                       __sti_mwait(0, 0);
+-                      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++                      if (ibrs_inuse)
+                               native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
+               } else {
+-                      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++                      if (ibrs_inuse)
+                               native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
+                       local_irq_enable();
+               }
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index a652bff7add4..9317aa4a7446 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -1693,14 +1693,14 @@ void native_play_dead(void)
+       play_dead_common();
+       tboot_shutdown(TB_SHUTDOWN_WFS);
+ 
+-      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++      if (ibrs_inuse)
+               native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ 
+       mwait_play_dead();      /* Only returns on failure */
+       if (cpuidle_play_dead())
+               hlt_play_dead();
+ 
+-      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++      if (ibrs_inuse)
+               native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
+ }
+ 
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 496884b6467f..d2168203bddc 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2269,7 +2269,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+       if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
+               per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
+               vmcs_load(vmx->loaded_vmcs->vmcs);
+-              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++              if (ibpb_inuse)
+                       native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+       }
+ 
+@@ -9102,7 +9102,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ 
+       atomic_switch_perf_msrs(vmx);
+ 
+-      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++      if (ibrs_inuse)
+               add_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL,
+                       vcpu->arch.spec_ctrl, FEATURE_ENABLE_IBRS);
+ 
+diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
+index b088463973e4..72a174642550 100644
+--- a/arch/x86/lib/delay.c
++++ b/arch/x86/lib/delay.c
+@@ -107,8 +107,7 @@ static void delay_mwaitx(unsigned long __loops)
+       for (;;) {
+               delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
+ 
+-              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
+-                      (delay > IBRS_DISABLE_THRESHOLD))
++              if (ibrs_inuse && (delay > IBRS_DISABLE_THRESHOLD))
+                       native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ 
+               /*
+@@ -124,8 +123,7 @@ static void delay_mwaitx(unsigned long __loops)
+                */
+               __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
+ 
+-              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
+-                      (delay > IBRS_DISABLE_THRESHOLD))
++              if (ibrs_inuse && (delay > IBRS_DISABLE_THRESHOLD))
+                       native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
+ 
+               end = rdtsc_ordered();
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 301e6efbc514..6365f769de3d 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -221,7 +221,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+               bool need_flush;
+ 
+               /* Null tsk means switching to kernel, so that's safe */
+-              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) && tsk &&
++              if (ibpb_inuse && tsk &&
+                       ___ptrace_may_access(tsk, current, PTRACE_MODE_IBPB))
+                       native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+ 
+diff --git a/kernel/smp.c b/kernel/smp.c
+index 3061483cb3ad..3bece045f4a4 100644
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -498,6 +498,26 @@ EXPORT_SYMBOL(smp_call_function);
+ unsigned int setup_max_cpus = NR_CPUS;
+ EXPORT_SYMBOL(setup_max_cpus);
+ 
++#ifdef CONFIG_X86
++/*
++ * use IBRS
++ * bit 0 = indicate if ibrs is currently in use
++ * bit 1 = indicate if system supports ibrs
++ * bit 2 = indicate if admin disables ibrs
++*/
++
++int use_ibrs;
++EXPORT_SYMBOL(use_ibrs);
++
++/*
++ * use IBRS
++ * bit 0 = indicate if ibpb is currently in use
++ * bit 1 = indicate if system supports ibpb
++ * bit 2 = indicate if admin disables ibpb
++*/
++int use_ibpb;
++EXPORT_SYMBOL(use_ibpb);
++#endif
+ 
+ /*
+  * Setup routine for controlling SMP activation
+@@ -522,6 +542,27 @@ static int __init nosmp(char *str)
+ 
+ early_param("nosmp", nosmp);
+ 
++#ifdef CONFIG_X86
++static int __init noibrs(char *str)
++{
++      set_ibrs_disabled();
++
++      return 0;
++}
++
++early_param("noibrs", noibrs);
++
++static int __init noibpb(char *str)
++{
++      set_ibpb_disabled();
++
++      return 0;
++}
++
++early_param("noibpb", noibpb);
++#endif
++
++
+ /* this is hard limit */
+ static int __init nrcpus(char *str)
+ {
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index 7ab08d5728e6..69c37bd6251a 100644
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -72,6 +72,7 @@
+ #include <asm/processor.h>
+ 
+ #ifdef CONFIG_X86
++#include <asm/msr.h>
+ #include <asm/nmi.h>
+ #include <asm/stacktrace.h>
+ #include <asm/io.h>
+@@ -222,6 +223,15 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
+               void __user *buffer, size_t *lenp, loff_t *ppos);
+ #endif
+ 
++#ifdef CONFIG_X86
++int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write,
++                 void __user *buffer, size_t *lenp, loff_t *ppos);
++int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write,
++                 void __user *buffer, size_t *lenp, loff_t *ppos);
++int proc_dointvec_ibrs_dump(struct ctl_table *table, int write,
++                 void __user *buffer, size_t *lenp, loff_t *ppos);
++#endif
++
+ #ifdef CONFIG_MAGIC_SYSRQ
+ /* Note: sysrq code uses it's own private copy */
+ static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
+@@ -258,6 +268,12 @@ extern struct ctl_table epoll_table[];
+ int sysctl_legacy_va_layout;
+ #endif
+ 
++u32 sysctl_ibrs_dump = 0;
++u32 sysctl_ibrs_enabled = 0;
++EXPORT_SYMBOL(sysctl_ibrs_enabled);
++u32 sysctl_ibpb_enabled = 0;
++EXPORT_SYMBOL(sysctl_ibpb_enabled);
++
+ /* The default sysctl tables: */
+ 
+ static struct ctl_table sysctl_base_table[] = {
+@@ -1241,6 +1257,35 @@ static struct ctl_table kern_table[] = {
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
++#endif
++#ifdef CONFIG_X86
++      {
++              .procname       = "ibrs_enabled",
++              .data           = &sysctl_ibrs_enabled,
++              .maxlen         = sizeof(unsigned int),
++              .mode           = 0644,
++              .proc_handler   = proc_dointvec_ibrs_ctrl,
++              .extra1         = &zero,
++              .extra2         = &two,
++      },
++      {
++              .procname       = "ibpb_enabled",
++              .data           = &sysctl_ibpb_enabled,
++              .maxlen         = sizeof(unsigned int),
++              .mode           = 0644,
++              .proc_handler   = proc_dointvec_ibpb_ctrl,
++              .extra1         = &zero,
++              .extra2         = &one,
++      },
++      {
++              .procname       = "ibrs_dump",
++              .data           = &sysctl_ibrs_dump,
++              .maxlen         = sizeof(unsigned int),
++              .mode           = 0644,
++              .proc_handler   = proc_dointvec_ibrs_dump,
++              .extra1         = &zero,
++              .extra2         = &one,
++      },
+ #endif
+       { }
+ };
+@@ -2585,6 +2630,86 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
+                               do_proc_dointvec_minmax_conv, &param);
+ }
+ 
++#ifdef CONFIG_X86
++int proc_dointvec_ibrs_dump(struct ctl_table *table, int write,
++      void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++      int ret;
++      unsigned int cpu;
++
++      ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
++      printk("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
++      printk("use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
++      for_each_online_cpu(cpu) {
++             u64 val;
++
++             if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
++                     rdmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, &val);
++             else
++                     val = 0;
++             printk("read cpu %d ibrs val %lu\n", cpu, (unsigned long) val);
++      }
++      return ret;
++}
++
++int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write,
++      void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++      int ret;
++      unsigned int cpu;
++
++      ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
++      pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
++      pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
++      if (sysctl_ibrs_enabled == 0) {
++              /* always set IBRS off */
++              set_ibrs_disabled();
++              if (ibrs_supported) {
++                      for_each_online_cpu(cpu)
++                              wrmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, 0x0);
++              }
++      } else if (sysctl_ibrs_enabled == 2) {
++              /* always set IBRS on, even in user space */
++              clear_ibrs_disabled();
++              if (ibrs_supported) {
++                      for_each_online_cpu(cpu)
++                              wrmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
++              } else {
++                      sysctl_ibrs_enabled = 0;
++              }
++      } else if (sysctl_ibrs_enabled == 1) {
++              /* use IBRS in kernel */
++              clear_ibrs_disabled();
++              if (!ibrs_inuse)
++                      /* platform don't support ibrs */
++                      sysctl_ibrs_enabled = 0;
++      }
++      pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
++      return ret;
++}
++
++int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write,
++      void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++      int ret;
++
++      ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
++      pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
++      pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
++      if (sysctl_ibpb_enabled == 0)
++              set_ibpb_disabled();
++      else if (sysctl_ibpb_enabled == 1) {
++              clear_ibpb_disabled();
++              if (!ibpb_inuse)
++                      /* platform don't support ibpb */
++                      sysctl_ibpb_enabled = 0;
++      }
++      pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
++      return ret;
++}
++#endif
++
++
+ struct do_proc_douintvec_minmax_conv_param {
+       unsigned int *min;
+       unsigned int *max;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0288-x86-spec_ctrl-Add-lock-to-serialize-changes-to-ibrs-.patch b/patches/kernel/0288-x86-spec_ctrl-Add-lock-to-serialize-changes-to-ibrs-.patch

new file mode 100644 (file)

index 0000000..ac1928c
--- /dev/null
+++ b/patches/kernel/0288-x86-spec_ctrl-Add-lock-to-serialize-changes-to-ibrs-.patch
@@ -0,0 +1,166 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Mon, 20 Nov 2017 13:47:54 -0800
+Subject: [PATCH] x86/spec_ctrl: Add lock to serialize changes to ibrs and ibpb
+ control
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 85789933bc45a3e763823675bd0d80e3e617f234)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/intel.c          | 22 ++++++++++++----------
+ arch/x86/kernel/cpu/microcode/core.c |  2 ++
+ kernel/smp.c                         |  4 ++++
+ kernel/sysctl.c                      | 14 +++++++++++++-
+ 4 files changed, 31 insertions(+), 11 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
+index c69ea2efbed1..8d558e24783c 100644
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -628,16 +628,18 @@ static void init_intel(struct cpuinfo_x86 *c)
+ 
+       init_intel_misc_features(c);
+ 
+-      if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
+-                printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n");
+-              set_ibrs_supported();
+-              set_ibpb_supported();
+-              if (ibrs_inuse)
+-                      sysctl_ibrs_enabled = 1;
+-              if (ibpb_inuse)
+-                      sysctl_ibpb_enabled = 1;
+-        } else {
+-                printk_once(KERN_INFO "FEATURE SPEC_CTRL Not Present\n");
++      if (!c->cpu_index) {
++              if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
++                      printk(KERN_INFO "FEATURE SPEC_CTRL Present\n");
++                      set_ibrs_supported();
++                      set_ibpb_supported();
++                      if (ibrs_inuse)
++                              sysctl_ibrs_enabled = 1;
++                      if (ibpb_inuse)
++                              sysctl_ibpb_enabled = 1;
++              } else {
++                      printk(KERN_INFO "FEATURE SPEC_CTRL Not Present\n");
++              }
+       }
+ }
+ 
+diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
+index 6450aeda72fc..55086921d29e 100644
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -538,12 +538,14 @@ static ssize_t reload_store(struct device *dev,
+ 
+       if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
+               printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n");
++              mutex_lock(&spec_ctrl_mutex);
+               set_ibrs_supported();
+               set_ibpb_supported();
+               if (ibrs_inuse)
+                       sysctl_ibrs_enabled = 1;
+               if (ibpb_inuse)
+                       sysctl_ibpb_enabled = 1;
++              mutex_unlock(&spec_ctrl_mutex);
+       }
+ 
+       mutex_unlock(&microcode_mutex);
+diff --git a/kernel/smp.c b/kernel/smp.c
+index 3bece045f4a4..a224ec0c540c 100644
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -519,6 +519,10 @@ int use_ibpb;
+ EXPORT_SYMBOL(use_ibpb);
+ #endif
+ 
++/* mutex to serialize IBRS & IBPB control changes */
++DEFINE_MUTEX(spec_ctrl_mutex);
++EXPORT_SYMBOL(spec_ctrl_mutex);
++
+ /*
+  * Setup routine for controlling SMP activation
+  *
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index 69c37bd6251a..47a37792109d 100644
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -69,6 +69,7 @@
+ #include <linux/mount.h>
+ 
+ #include <linux/uaccess.h>
++#include <linux/mutex.h>
+ #include <asm/processor.h>
+ 
+ #ifdef CONFIG_X86
+@@ -2634,12 +2635,17 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
+ int proc_dointvec_ibrs_dump(struct ctl_table *table, int write,
+       void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+-      int ret;
++      int ret, orig_inuse;
+       unsigned int cpu;
+ 
++
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       printk("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
+       printk("use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
++      mutex_lock(&spec_ctrl_mutex);
++      orig_inuse = use_ibrs;
++      /* temporary halt to ibrs usage to dump ibrs values */
++      clear_ibrs_inuse();
+       for_each_online_cpu(cpu) {
+              u64 val;
+ 
+@@ -2649,6 +2655,8 @@ int proc_dointvec_ibrs_dump(struct ctl_table *table, int write,
+                      val = 0;
+              printk("read cpu %d ibrs val %lu\n", cpu, (unsigned long) val);
+       }
++      use_ibrs = orig_inuse;
++      mutex_unlock(&spec_ctrl_mutex);
+       return ret;
+ }
+ 
+@@ -2661,6 +2669,7 @@ int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write,
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
+       pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
++      mutex_lock(&spec_ctrl_mutex);
+       if (sysctl_ibrs_enabled == 0) {
+               /* always set IBRS off */
+               set_ibrs_disabled();
+@@ -2684,6 +2693,7 @@ int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write,
+                       /* platform don't support ibrs */
+                       sysctl_ibrs_enabled = 0;
+       }
++      mutex_unlock(&spec_ctrl_mutex);
+       pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
+       return ret;
+ }
+@@ -2696,6 +2706,7 @@ int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write,
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
+       pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
++      mutex_lock(&spec_ctrl_mutex);
+       if (sysctl_ibpb_enabled == 0)
+               set_ibpb_disabled();
+       else if (sysctl_ibpb_enabled == 1) {
+@@ -2704,6 +2715,7 @@ int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write,
+                       /* platform don't support ibpb */
+                       sysctl_ibpb_enabled = 0;
+       }
++      mutex_unlock(&spec_ctrl_mutex);
+       pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
+       return ret;
+ }
+-- 
+2.14.2
+
diff --git a/patches/kernel/0288-x86-syscall-Clear-unused-extra-registers-on-syscall-.patch b/patches/kernel/0288-x86-syscall-Clear-unused-extra-registers-on-syscall-.patch

deleted file mode 100644 (file)

index c1ab5a1..0000000
--- a/patches/kernel/0288-x86-syscall-Clear-unused-extra-registers-on-syscall-.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Tue, 19 Sep 2017 15:21:40 -0700
-Subject: [PATCH] x86/syscall: Clear unused extra registers on syscall entrance
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-To prevent the unused registers %r12-%r15, %rbp and %rbx from
-being used speculatively, we clear them upon syscall entrance
-for code hygiene.
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 7b5ea16f42b5e4860cf9033897bcdfa3e1209033)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/calling.h  |  9 +++++++++
- arch/x86/entry/entry_64.S | 12 ++++++++----
- 2 files changed, 17 insertions(+), 4 deletions(-)
-
-diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
-index 015e0a84bb99..d537818ad285 100644
---- a/arch/x86/entry/calling.h
-+++ b/arch/x86/entry/calling.h
-@@ -155,6 +155,15 @@ For 32-bit we have the following conventions - kernel is built with
-       popq %rbx
-       .endm
- 
-+      .macro CLEAR_EXTRA_REGS
-+      xorq %r15, %r15
-+      xorq %r14, %r14
-+      xorq %r13, %r13
-+      xorq %r12, %r12
-+      xorq %rbp, %rbp
-+      xorq %rbx, %rbx
-+      .endm
-+
-       .macro POP_C_REGS
-       popq %r11
-       popq %r10
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index f6ec4ad5b114..1118a6256c69 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -231,10 +231,16 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
-       pushq   %r9                             /* pt_regs->r9 */
-       pushq   %r10                            /* pt_regs->r10 */
-       pushq   %r11                            /* pt_regs->r11 */
--      sub     $(6*8), %rsp                    /* pt_regs->bp, bx, r12-15 not saved */
-+      sub     $(6*8), %rsp                    /* pt_regs->bp, bx, r12-15 not used */
-       UNWIND_HINT_REGS extra=0
- 
-       ENABLE_IBRS
-+      /*
-+       * Clear the unused extra regs for code hygiene.
-+       * Will restore the callee saved extra regs at end of syscall.
-+       */
-+      SAVE_EXTRA_REGS
-+      CLEAR_EXTRA_REGS
- 
-       STUFF_RSB
- 
-@@ -292,7 +298,7 @@ entry_SYSCALL_64_fastpath:
-       movq    RIP(%rsp), %rcx
-       movq    EFLAGS(%rsp), %r11
-       DISABLE_IBRS
--      addq    $6*8, %rsp      /* skip extra regs -- they were preserved */
-+      POP_EXTRA_REGS
-       UNWIND_HINT_EMPTY
-       jmp     .Lpop_c_regs_except_rcx_r11_and_sysret
- 
-@@ -304,14 +310,12 @@ entry_SYSCALL_64_fastpath:
-        */
-       TRACE_IRQS_ON
-       ENABLE_INTERRUPTS(CLBR_ANY)
--      SAVE_EXTRA_REGS
-       movq    %rsp, %rdi
-       call    syscall_return_slowpath /* returns with IRQs disabled */
-       jmp     return_from_SYSCALL_64
- 
- entry_SYSCALL64_slow_path:
-       /* IRQs are off. */
--      SAVE_EXTRA_REGS
-       movq    %rsp, %rdi
-       call    do_syscall_64           /* returns with IRQs disabled */
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0289-x86-syscall-Clear-unused-extra-registers-on-32-bit-c.patch b/patches/kernel/0289-x86-syscall-Clear-unused-extra-registers-on-32-bit-c.patch

deleted file mode 100644 (file)

index e6f6cbc..0000000
--- a/patches/kernel/0289-x86-syscall-Clear-unused-extra-registers-on-32-bit-c.patch
+++ /dev/null
@@ -1,101 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Fri, 15 Sep 2017 19:41:24 -0700
-Subject: [PATCH] x86/syscall: Clear unused extra registers on 32-bit
- compatible syscall entrance
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-To prevent the unused registers %r8-%r15, from being used speculatively,
-we clear them upon syscall entrance for code hygiene in 32 bit compatible
-mode.
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 725ad2ef81ccceb3e31a7263faae2059d05e2c48)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/calling.h         | 11 +++++++++++
- arch/x86/entry/entry_64_compat.S | 18 ++++++++++++++----
- 2 files changed, 25 insertions(+), 4 deletions(-)
-
-diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
-index d537818ad285..0e34002bc801 100644
---- a/arch/x86/entry/calling.h
-+++ b/arch/x86/entry/calling.h
-@@ -155,6 +155,17 @@ For 32-bit we have the following conventions - kernel is built with
-       popq %rbx
-       .endm
- 
-+      .macro CLEAR_R8_TO_R15
-+      xorq %r15, %r15
-+      xorq %r14, %r14
-+      xorq %r13, %r13
-+      xorq %r12, %r12
-+      xorq %r11, %r11
-+      xorq %r10, %r10
-+      xorq %r9, %r9
-+      xorq %r8, %r8
-+      .endm
-+
-       .macro CLEAR_EXTRA_REGS
-       xorq %r15, %r15
-       xorq %r14, %r14
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
-index 1480222bae02..8d7ae9657375 100644
---- a/arch/x86/entry/entry_64_compat.S
-+++ b/arch/x86/entry/entry_64_compat.S
-@@ -99,6 +99,8 @@ ENTRY(entry_SYSENTER_compat)
-       ENABLE_IBRS
-       STUFF_RSB
- 
-+      CLEAR_R8_TO_R15
-+
-       /*
-        * SYSENTER doesn't filter flags, so we need to clear NT and AC
-        * ourselves.  To save a few cycles, we can check whether
-@@ -223,10 +225,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
-       pushq   $0                      /* pt_regs->r11 = 0 */
-       pushq   %rbx                    /* pt_regs->rbx */
-       pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
--      pushq   $0                      /* pt_regs->r12 = 0 */
--      pushq   $0                      /* pt_regs->r13 = 0 */
--      pushq   $0                      /* pt_regs->r14 = 0 */
--      pushq   $0                      /* pt_regs->r15 = 0 */
-+      pushq   %r12                    /* pt_regs->r12 */
-+      pushq   %r13                    /* pt_regs->r13 */
-+      pushq   %r14                    /* pt_regs->r14 */
-+      pushq   %r15                    /* pt_regs->r15 */
-+
-+      CLEAR_R8_TO_R15
- 
-       STUFF_RSB
- 
-@@ -245,6 +249,10 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
-       /* Opportunistic SYSRET */
- sysret32_from_system_call:
-       TRACE_IRQS_ON                   /* User mode traces as IRQs on. */
-+      movq    R15(%rsp), %r15         /* pt_regs->r15 */
-+      movq    R14(%rsp), %r14         /* pt_regs->r14 */
-+      movq    R13(%rsp), %r13         /* pt_regs->r13 */
-+      movq    R12(%rsp), %r12         /* pt_regs->r12 */
-       movq    RBX(%rsp), %rbx         /* pt_regs->rbx */
-       movq    RBP(%rsp), %rbp         /* pt_regs->rbp */
-       movq    EFLAGS(%rsp), %r11      /* pt_regs->flags (in r11) */
-@@ -359,6 +367,8 @@ ENTRY(entry_INT80_compat)
-       ENABLE_IBRS
-       STUFF_RSB
- 
-+      CLEAR_R8_TO_R15
-+
-       /*
-        * User mode is traced as though IRQs are on, and the interrupt
-        * gate turned them off.
--- 
-2.14.2
-
diff --git a/patches/kernel/0289-x86-syscall-Clear-unused-extra-registers-on-syscall-.patch b/patches/kernel/0289-x86-syscall-Clear-unused-extra-registers-on-syscall-.patch

new file mode 100644 (file)

index 0000000..c1ab5a1
--- /dev/null
+++ b/patches/kernel/0289-x86-syscall-Clear-unused-extra-registers-on-syscall-.patch
@@ -0,0 +1,94 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Tue, 19 Sep 2017 15:21:40 -0700
+Subject: [PATCH] x86/syscall: Clear unused extra registers on syscall entrance
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+To prevent the unused registers %r12-%r15, %rbp and %rbx from
+being used speculatively, we clear them upon syscall entrance
+for code hygiene.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 7b5ea16f42b5e4860cf9033897bcdfa3e1209033)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/calling.h  |  9 +++++++++
+ arch/x86/entry/entry_64.S | 12 ++++++++----
+ 2 files changed, 17 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
+index 015e0a84bb99..d537818ad285 100644
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -155,6 +155,15 @@ For 32-bit we have the following conventions - kernel is built with
+       popq %rbx
+       .endm
+ 
++      .macro CLEAR_EXTRA_REGS
++      xorq %r15, %r15
++      xorq %r14, %r14
++      xorq %r13, %r13
++      xorq %r12, %r12
++      xorq %rbp, %rbp
++      xorq %rbx, %rbx
++      .endm
++
+       .macro POP_C_REGS
+       popq %r11
+       popq %r10
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index f6ec4ad5b114..1118a6256c69 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -231,10 +231,16 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
+       pushq   %r9                             /* pt_regs->r9 */
+       pushq   %r10                            /* pt_regs->r10 */
+       pushq   %r11                            /* pt_regs->r11 */
+-      sub     $(6*8), %rsp                    /* pt_regs->bp, bx, r12-15 not saved */
++      sub     $(6*8), %rsp                    /* pt_regs->bp, bx, r12-15 not used */
+       UNWIND_HINT_REGS extra=0
+ 
+       ENABLE_IBRS
++      /*
++       * Clear the unused extra regs for code hygiene.
++       * Will restore the callee saved extra regs at end of syscall.
++       */
++      SAVE_EXTRA_REGS
++      CLEAR_EXTRA_REGS
+ 
+       STUFF_RSB
+ 
+@@ -292,7 +298,7 @@ entry_SYSCALL_64_fastpath:
+       movq    RIP(%rsp), %rcx
+       movq    EFLAGS(%rsp), %r11
+       DISABLE_IBRS
+-      addq    $6*8, %rsp      /* skip extra regs -- they were preserved */
++      POP_EXTRA_REGS
+       UNWIND_HINT_EMPTY
+       jmp     .Lpop_c_regs_except_rcx_r11_and_sysret
+ 
+@@ -304,14 +310,12 @@ entry_SYSCALL_64_fastpath:
+        */
+       TRACE_IRQS_ON
+       ENABLE_INTERRUPTS(CLBR_ANY)
+-      SAVE_EXTRA_REGS
+       movq    %rsp, %rdi
+       call    syscall_return_slowpath /* returns with IRQs disabled */
+       jmp     return_from_SYSCALL_64
+ 
+ entry_SYSCALL64_slow_path:
+       /* IRQs are off. */
+-      SAVE_EXTRA_REGS
+       movq    %rsp, %rdi
+       call    do_syscall_64           /* returns with IRQs disabled */
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0290-x86-entry-Use-retpoline-for-syscall-s-indirect-calls.patch b/patches/kernel/0290-x86-entry-Use-retpoline-for-syscall-s-indirect-calls.patch

deleted file mode 100644 (file)

index 831c137..0000000
--- a/patches/kernel/0290-x86-entry-Use-retpoline-for-syscall-s-indirect-calls.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tim Chen <tim.c.chen@linux.intel.com>
-Date: Wed, 8 Nov 2017 16:30:06 -0800
-Subject: [PATCH] x86/entry: Use retpoline for syscall's indirect calls
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit d2e0236f395e876f5303fb5021e4fe6eea881402)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/entry/entry_64.S | 10 +++++++++-
- 1 file changed, 9 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
-index 1118a6256c69..be7196967f9f 100644
---- a/arch/x86/entry/entry_64.S
-+++ b/arch/x86/entry/entry_64.S
-@@ -276,7 +276,15 @@ entry_SYSCALL_64_fastpath:
-        * It might end up jumping to the slow path.  If it jumps, RAX
-        * and all argument registers are clobbered.
-        */
--      call    *sys_call_table(, %rax, 8)
-+      movq    sys_call_table(, %rax, 8), %r10
-+      jmp     1f
-+4:    callq   2f
-+3:    nop
-+      jmp     3b
-+2:    mov     %r10, (%rsp)
-+      retq
-+1:    callq   4b
-+
- .Lentry_SYSCALL_64_after_fastpath_call:
- 
-       movq    %rax, RAX(%rsp)
--- 
-2.14.2
-
diff --git a/patches/kernel/0290-x86-syscall-Clear-unused-extra-registers-on-32-bit-c.patch b/patches/kernel/0290-x86-syscall-Clear-unused-extra-registers-on-32-bit-c.patch

new file mode 100644 (file)

index 0000000..e6f6cbc
--- /dev/null
+++ b/patches/kernel/0290-x86-syscall-Clear-unused-extra-registers-on-32-bit-c.patch
@@ -0,0 +1,101 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Fri, 15 Sep 2017 19:41:24 -0700
+Subject: [PATCH] x86/syscall: Clear unused extra registers on 32-bit
+ compatible syscall entrance
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+To prevent the unused registers %r8-%r15, from being used speculatively,
+we clear them upon syscall entrance for code hygiene in 32 bit compatible
+mode.
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 725ad2ef81ccceb3e31a7263faae2059d05e2c48)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/calling.h         | 11 +++++++++++
+ arch/x86/entry/entry_64_compat.S | 18 ++++++++++++++----
+ 2 files changed, 25 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
+index d537818ad285..0e34002bc801 100644
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -155,6 +155,17 @@ For 32-bit we have the following conventions - kernel is built with
+       popq %rbx
+       .endm
+ 
++      .macro CLEAR_R8_TO_R15
++      xorq %r15, %r15
++      xorq %r14, %r14
++      xorq %r13, %r13
++      xorq %r12, %r12
++      xorq %r11, %r11
++      xorq %r10, %r10
++      xorq %r9, %r9
++      xorq %r8, %r8
++      .endm
++
+       .macro CLEAR_EXTRA_REGS
+       xorq %r15, %r15
+       xorq %r14, %r14
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index 1480222bae02..8d7ae9657375 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -99,6 +99,8 @@ ENTRY(entry_SYSENTER_compat)
+       ENABLE_IBRS
+       STUFF_RSB
+ 
++      CLEAR_R8_TO_R15
++
+       /*
+        * SYSENTER doesn't filter flags, so we need to clear NT and AC
+        * ourselves.  To save a few cycles, we can check whether
+@@ -223,10 +225,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
+       pushq   $0                      /* pt_regs->r11 = 0 */
+       pushq   %rbx                    /* pt_regs->rbx */
+       pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
+-      pushq   $0                      /* pt_regs->r12 = 0 */
+-      pushq   $0                      /* pt_regs->r13 = 0 */
+-      pushq   $0                      /* pt_regs->r14 = 0 */
+-      pushq   $0                      /* pt_regs->r15 = 0 */
++      pushq   %r12                    /* pt_regs->r12 */
++      pushq   %r13                    /* pt_regs->r13 */
++      pushq   %r14                    /* pt_regs->r14 */
++      pushq   %r15                    /* pt_regs->r15 */
++
++      CLEAR_R8_TO_R15
+ 
+       STUFF_RSB
+ 
+@@ -245,6 +249,10 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
+       /* Opportunistic SYSRET */
+ sysret32_from_system_call:
+       TRACE_IRQS_ON                   /* User mode traces as IRQs on. */
++      movq    R15(%rsp), %r15         /* pt_regs->r15 */
++      movq    R14(%rsp), %r14         /* pt_regs->r14 */
++      movq    R13(%rsp), %r13         /* pt_regs->r13 */
++      movq    R12(%rsp), %r12         /* pt_regs->r12 */
+       movq    RBX(%rsp), %rbx         /* pt_regs->rbx */
+       movq    RBP(%rsp), %rbp         /* pt_regs->rbp */
+       movq    EFLAGS(%rsp), %r11      /* pt_regs->flags (in r11) */
+@@ -359,6 +367,8 @@ ENTRY(entry_INT80_compat)
+       ENABLE_IBRS
+       STUFF_RSB
+ 
++      CLEAR_R8_TO_R15
++
+       /*
+        * User mode is traced as though IRQs are on, and the interrupt
+        * gate turned them off.
+-- 
+2.14.2
+
diff --git a/patches/kernel/0291-x86-cpu-AMD-Add-speculative-control-support-for-AMD.patch b/patches/kernel/0291-x86-cpu-AMD-Add-speculative-control-support-for-AMD.patch

deleted file mode 100644 (file)

index febc693..0000000
--- a/patches/kernel/0291-x86-cpu-AMD-Add-speculative-control-support-for-AMD.patch
+++ /dev/null
@@ -1,112 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Wed, 20 Dec 2017 10:52:54 +0000
-Subject: [PATCH] x86/cpu/AMD: Add speculative control support for AMD
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Add speculative control support for AMD processors. For AMD, speculative
-control is indicated as follows:
-
-  CPUID EAX=0x00000007, ECX=0x00 return EDX[26] indicates support for
-  both IBRS and IBPB.
-
-  CPUID EAX=0x80000008, ECX=0x00 return EBX[12] indicates support for
-  just IBPB.
-
-On AMD family 0x10, 0x12 and 0x16 processors where either of the above
-features are not supported, IBPB can be achieved by disabling
-indirect branch predictor support in MSR 0xc0011021[14] at boot.
-
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 8c3fc9e98177daee2281ed40e3d61f9cf4eee576)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/cpufeatures.h |  1 +
- arch/x86/include/asm/msr-index.h   |  1 +
- arch/x86/kernel/cpu/amd.c          | 39 ++++++++++++++++++++++++++++++++++++++
- 3 files changed, 41 insertions(+)
-
-diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
-index 44be8fd069bf..a97b327137aa 100644
---- a/arch/x86/include/asm/cpufeatures.h
-+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -268,6 +268,7 @@
- #define X86_FEATURE_CLZERO            (13*32+ 0) /* CLZERO instruction */
- #define X86_FEATURE_IRPERF            (13*32+ 1) /* Instructions Retired Count */
- #define X86_FEATURE_XSAVEERPTR                (13*32+ 2) /* Always save/restore FP error pointers */
-+#define X86_FEATURE_IBPB              (13*32+12) /* Indirect Branch Prediction Barrier */
- 
- /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
- #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
-diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
-index 4e3438a00a50..954aad6c32f4 100644
---- a/arch/x86/include/asm/msr-index.h
-+++ b/arch/x86/include/asm/msr-index.h
-@@ -345,6 +345,7 @@
- #define MSR_F15H_NB_PERF_CTR          0xc0010241
- #define MSR_F15H_PTSC                 0xc0010280
- #define MSR_F15H_IC_CFG                       0xc0011021
-+#define MSR_F15H_IC_CFG_DIS_IND               BIT_ULL(14)
- 
- /* Fam 10h MSRs */
- #define MSR_FAM10H_MMIO_CONF_BASE     0xc0010058
-diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
-index 99eef4a09fd9..42871c1a8da8 100644
---- a/arch/x86/kernel/cpu/amd.c
-+++ b/arch/x86/kernel/cpu/amd.c
-@@ -830,6 +830,45 @@ static void init_amd(struct cpuinfo_x86 *c)
-       /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
-       if (!cpu_has(c, X86_FEATURE_XENPV))
-               set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
-+
-+      /* AMD speculative control support */
-+      if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
-+              pr_info_once("FEATURE SPEC_CTRL Present\n");
-+              set_ibrs_supported();
-+              set_ibpb_supported();
-+              if (ibrs_inuse)
-+                      sysctl_ibrs_enabled = 1;
-+              if (ibpb_inuse)
-+                      sysctl_ibpb_enabled = 1;
-+      } else if (cpu_has(c, X86_FEATURE_IBPB)) {
-+              pr_info_once("FEATURE SPEC_CTRL Not Present\n");
-+              pr_info_once("FEATURE IBPB Present\n");
-+              set_ibpb_supported();
-+              if (ibpb_inuse)
-+                      sysctl_ibpb_enabled = 1;
-+      } else {
-+              pr_info_once("FEATURE SPEC_CTRL Not Present\n");
-+              pr_info_once("FEATURE IBPB Not Present\n");
-+              /*
-+               * On AMD processors that do not support the speculative
-+               * control features, IBPB type support can be achieved by
-+               * disabling indirect branch predictor support.
-+               */
-+              if (!ibpb_disabled) {
-+                      u64 val;
-+
-+                      switch (c->x86) {
-+                      case 0x10:
-+                      case 0x12:
-+                      case 0x16:
-+                              pr_info_once("Disabling indirect branch predictor support\n");
-+                              rdmsrl(MSR_F15H_IC_CFG, val);
-+                              val |= MSR_F15H_IC_CFG_DIS_IND;
-+                              wrmsrl(MSR_F15H_IC_CFG, val);
-+                              break;
-+                      }
-+              }
-+      }
- }
- 
- #ifdef CONFIG_X86_32
--- 
-2.14.2
-
diff --git a/patches/kernel/0291-x86-entry-Use-retpoline-for-syscall-s-indirect-calls.patch b/patches/kernel/0291-x86-entry-Use-retpoline-for-syscall-s-indirect-calls.patch

new file mode 100644 (file)

index 0000000..831c137
--- /dev/null
+++ b/patches/kernel/0291-x86-entry-Use-retpoline-for-syscall-s-indirect-calls.patch
@@ -0,0 +1,44 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chen <tim.c.chen@linux.intel.com>
+Date: Wed, 8 Nov 2017 16:30:06 -0800
+Subject: [PATCH] x86/entry: Use retpoline for syscall's indirect calls
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit d2e0236f395e876f5303fb5021e4fe6eea881402)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/entry/entry_64.S | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 1118a6256c69..be7196967f9f 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -276,7 +276,15 @@ entry_SYSCALL_64_fastpath:
+        * It might end up jumping to the slow path.  If it jumps, RAX
+        * and all argument registers are clobbered.
+        */
+-      call    *sys_call_table(, %rax, 8)
++      movq    sys_call_table(, %rax, 8), %r10
++      jmp     1f
++4:    callq   2f
++3:    nop
++      jmp     3b
++2:    mov     %r10, (%rsp)
++      retq
++1:    callq   4b
++
+ .Lentry_SYSCALL_64_after_fastpath_call:
+ 
+       movq    %rax, RAX(%rsp)
+-- 
+2.14.2
+
diff --git a/patches/kernel/0292-x86-cpu-AMD-Add-speculative-control-support-for-AMD.patch b/patches/kernel/0292-x86-cpu-AMD-Add-speculative-control-support-for-AMD.patch

new file mode 100644 (file)

index 0000000..febc693
--- /dev/null
+++ b/patches/kernel/0292-x86-cpu-AMD-Add-speculative-control-support-for-AMD.patch
@@ -0,0 +1,112 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Wed, 20 Dec 2017 10:52:54 +0000
+Subject: [PATCH] x86/cpu/AMD: Add speculative control support for AMD
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Add speculative control support for AMD processors. For AMD, speculative
+control is indicated as follows:
+
+  CPUID EAX=0x00000007, ECX=0x00 return EDX[26] indicates support for
+  both IBRS and IBPB.
+
+  CPUID EAX=0x80000008, ECX=0x00 return EBX[12] indicates support for
+  just IBPB.
+
+On AMD family 0x10, 0x12 and 0x16 processors where either of the above
+features are not supported, IBPB can be achieved by disabling
+indirect branch predictor support in MSR 0xc0011021[14] at boot.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 8c3fc9e98177daee2281ed40e3d61f9cf4eee576)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/cpufeatures.h |  1 +
+ arch/x86/include/asm/msr-index.h   |  1 +
+ arch/x86/kernel/cpu/amd.c          | 39 ++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 41 insertions(+)
+
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index 44be8fd069bf..a97b327137aa 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -268,6 +268,7 @@
+ #define X86_FEATURE_CLZERO            (13*32+ 0) /* CLZERO instruction */
+ #define X86_FEATURE_IRPERF            (13*32+ 1) /* Instructions Retired Count */
+ #define X86_FEATURE_XSAVEERPTR                (13*32+ 2) /* Always save/restore FP error pointers */
++#define X86_FEATURE_IBPB              (13*32+12) /* Indirect Branch Prediction Barrier */
+ 
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index 4e3438a00a50..954aad6c32f4 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -345,6 +345,7 @@
+ #define MSR_F15H_NB_PERF_CTR          0xc0010241
+ #define MSR_F15H_PTSC                 0xc0010280
+ #define MSR_F15H_IC_CFG                       0xc0011021
++#define MSR_F15H_IC_CFG_DIS_IND               BIT_ULL(14)
+ 
+ /* Fam 10h MSRs */
+ #define MSR_FAM10H_MMIO_CONF_BASE     0xc0010058
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index 99eef4a09fd9..42871c1a8da8 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -830,6 +830,45 @@ static void init_amd(struct cpuinfo_x86 *c)
+       /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
+       if (!cpu_has(c, X86_FEATURE_XENPV))
+               set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
++
++      /* AMD speculative control support */
++      if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
++              pr_info_once("FEATURE SPEC_CTRL Present\n");
++              set_ibrs_supported();
++              set_ibpb_supported();
++              if (ibrs_inuse)
++                      sysctl_ibrs_enabled = 1;
++              if (ibpb_inuse)
++                      sysctl_ibpb_enabled = 1;
++      } else if (cpu_has(c, X86_FEATURE_IBPB)) {
++              pr_info_once("FEATURE SPEC_CTRL Not Present\n");
++              pr_info_once("FEATURE IBPB Present\n");
++              set_ibpb_supported();
++              if (ibpb_inuse)
++                      sysctl_ibpb_enabled = 1;
++      } else {
++              pr_info_once("FEATURE SPEC_CTRL Not Present\n");
++              pr_info_once("FEATURE IBPB Not Present\n");
++              /*
++               * On AMD processors that do not support the speculative
++               * control features, IBPB type support can be achieved by
++               * disabling indirect branch predictor support.
++               */
++              if (!ibpb_disabled) {
++                      u64 val;
++
++                      switch (c->x86) {
++                      case 0x10:
++                      case 0x12:
++                      case 0x16:
++                              pr_info_once("Disabling indirect branch predictor support\n");
++                              rdmsrl(MSR_F15H_IC_CFG, val);
++                              val |= MSR_F15H_IC_CFG_DIS_IND;
++                              wrmsrl(MSR_F15H_IC_CFG, val);
++                              break;
++                      }
++              }
++      }
+ }
+ 
+ #ifdef CONFIG_X86_32
+-- 
+2.14.2
+
diff --git a/patches/kernel/0292-x86-microcode-Extend-post-microcode-reload-to-suppor.patch b/patches/kernel/0292-x86-microcode-Extend-post-microcode-reload-to-suppor.patch

deleted file mode 100644 (file)

index eef6684..0000000
--- a/patches/kernel/0292-x86-microcode-Extend-post-microcode-reload-to-suppor.patch
+++ /dev/null
@@ -1,45 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Wed, 20 Dec 2017 10:55:47 +0000
-Subject: [PATCH] x86/microcode: Extend post microcode reload to support IBPB
- feature
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Add an IBPB feature check to the speculative control update check after
-a microcode reload.
-
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 099878acd3738271fb2ade01f4649b1ed2fb72d5)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/cpu/microcode/core.c | 7 +++++++
- 1 file changed, 7 insertions(+)
-
-diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
-index 55086921d29e..638c08350d65 100644
---- a/arch/x86/kernel/cpu/microcode/core.c
-+++ b/arch/x86/kernel/cpu/microcode/core.c
-@@ -546,6 +546,13 @@ static ssize_t reload_store(struct device *dev,
-               if (ibpb_inuse)
-                       sysctl_ibpb_enabled = 1;
-               mutex_unlock(&spec_ctrl_mutex);
-+      } else if (boot_cpu_has(X86_FEATURE_IBPB)) {
-+              printk_once(KERN_INFO "FEATURE IBPB Present\n");
-+              mutex_lock(&spec_ctrl_mutex);
-+              set_ibpb_supported();
-+              if (ibpb_inuse)
-+                      sysctl_ibpb_enabled = 1;
-+              mutex_unlock(&spec_ctrl_mutex);
-       }
- 
-       mutex_unlock(&microcode_mutex);
--- 
-2.14.2
-
diff --git a/patches/kernel/0293-KVM-SVM-Do-not-intercept-new-speculative-control-MSR.patch b/patches/kernel/0293-KVM-SVM-Do-not-intercept-new-speculative-control-MSR.patch

deleted file mode 100644 (file)

index b57fe4e..0000000
--- a/patches/kernel/0293-KVM-SVM-Do-not-intercept-new-speculative-control-MSR.patch
+++ /dev/null
@@ -1,40 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Wed, 20 Dec 2017 10:55:47 +0000
-Subject: [PATCH] KVM: SVM: Do not intercept new speculative control MSRs
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Allow guest access to the speculative control MSRs without being
-intercepted.
-
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit ccaa77a824fd3e21f0b8ae6b5a66fc1ee7e35b14)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kvm/svm.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
-index 92cd94d51e1f..94adf6becc2e 100644
---- a/arch/x86/kvm/svm.c
-+++ b/arch/x86/kvm/svm.c
-@@ -248,6 +248,8 @@ static const struct svm_direct_access_msrs {
-       { .index = MSR_CSTAR,                           .always = true  },
-       { .index = MSR_SYSCALL_MASK,                    .always = true  },
- #endif
-+      { .index = MSR_IA32_SPEC_CTRL,                  .always = true },
-+      { .index = MSR_IA32_PRED_CMD,                   .always = true },
-       { .index = MSR_IA32_LASTBRANCHFROMIP,           .always = false },
-       { .index = MSR_IA32_LASTBRANCHTOIP,             .always = false },
-       { .index = MSR_IA32_LASTINTFROMIP,              .always = false },
--- 
-2.14.2
-
diff --git a/patches/kernel/0293-x86-microcode-Extend-post-microcode-reload-to-suppor.patch b/patches/kernel/0293-x86-microcode-Extend-post-microcode-reload-to-suppor.patch

new file mode 100644 (file)

index 0000000..eef6684
--- /dev/null
+++ b/patches/kernel/0293-x86-microcode-Extend-post-microcode-reload-to-suppor.patch
@@ -0,0 +1,45 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Wed, 20 Dec 2017 10:55:47 +0000
+Subject: [PATCH] x86/microcode: Extend post microcode reload to support IBPB
+ feature
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Add an IBPB feature check to the speculative control update check after
+a microcode reload.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 099878acd3738271fb2ade01f4649b1ed2fb72d5)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/cpu/microcode/core.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
+index 55086921d29e..638c08350d65 100644
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -546,6 +546,13 @@ static ssize_t reload_store(struct device *dev,
+               if (ibpb_inuse)
+                       sysctl_ibpb_enabled = 1;
+               mutex_unlock(&spec_ctrl_mutex);
++      } else if (boot_cpu_has(X86_FEATURE_IBPB)) {
++              printk_once(KERN_INFO "FEATURE IBPB Present\n");
++              mutex_lock(&spec_ctrl_mutex);
++              set_ibpb_supported();
++              if (ibpb_inuse)
++                      sysctl_ibpb_enabled = 1;
++              mutex_unlock(&spec_ctrl_mutex);
+       }
+ 
+       mutex_unlock(&microcode_mutex);
+-- 
+2.14.2
+
diff --git a/patches/kernel/0294-KVM-SVM-Do-not-intercept-new-speculative-control-MSR.patch b/patches/kernel/0294-KVM-SVM-Do-not-intercept-new-speculative-control-MSR.patch

new file mode 100644 (file)

index 0000000..b57fe4e
--- /dev/null
+++ b/patches/kernel/0294-KVM-SVM-Do-not-intercept-new-speculative-control-MSR.patch
@@ -0,0 +1,40 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Wed, 20 Dec 2017 10:55:47 +0000
+Subject: [PATCH] KVM: SVM: Do not intercept new speculative control MSRs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Allow guest access to the speculative control MSRs without being
+intercepted.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit ccaa77a824fd3e21f0b8ae6b5a66fc1ee7e35b14)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/svm.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 92cd94d51e1f..94adf6becc2e 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -248,6 +248,8 @@ static const struct svm_direct_access_msrs {
+       { .index = MSR_CSTAR,                           .always = true  },
+       { .index = MSR_SYSCALL_MASK,                    .always = true  },
+ #endif
++      { .index = MSR_IA32_SPEC_CTRL,                  .always = true },
++      { .index = MSR_IA32_PRED_CMD,                   .always = true },
+       { .index = MSR_IA32_LASTBRANCHFROMIP,           .always = false },
+       { .index = MSR_IA32_LASTBRANCHTOIP,             .always = false },
+       { .index = MSR_IA32_LASTINTFROMIP,              .always = false },
+-- 
+2.14.2
+
diff --git a/patches/kernel/0294-x86-svm-Set-IBRS-value-on-VM-entry-and-exit.patch b/patches/kernel/0294-x86-svm-Set-IBRS-value-on-VM-entry-and-exit.patch

deleted file mode 100644 (file)

index 880d9b4..0000000
--- a/patches/kernel/0294-x86-svm-Set-IBRS-value-on-VM-entry-and-exit.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Wed, 20 Dec 2017 10:55:47 +0000
-Subject: [PATCH] x86/svm: Set IBRS value on VM entry and exit
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Set/restore the guests IBRS value on VM entry. On VM exit back to the
-kernel save the guest IBRS value and then set IBRS to 1.
-
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 72f71e6826fac9a656c3994fb6f979cd65a14c64)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kvm/svm.c | 17 +++++++++++++++++
- 1 file changed, 17 insertions(+)
-
-diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
-index 94adf6becc2e..a1b19e810c49 100644
---- a/arch/x86/kvm/svm.c
-+++ b/arch/x86/kvm/svm.c
-@@ -175,6 +175,8 @@ struct vcpu_svm {
- 
-       u64 next_rip;
- 
-+      u64 spec_ctrl;
-+
-       u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
-       struct {
-               u16 fs;
-@@ -3547,6 +3549,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
-       case MSR_VM_CR:
-               msr_info->data = svm->nested.vm_cr_msr;
-               break;
-+      case MSR_IA32_SPEC_CTRL:
-+              msr_info->data = svm->spec_ctrl;
-+              break;
-       case MSR_IA32_UCODE_REV:
-               msr_info->data = 0x01000065;
-               break;
-@@ -3702,6 +3707,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
-       case MSR_VM_IGNNE:
-               vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
-               break;
-+      case MSR_IA32_SPEC_CTRL:
-+              svm->spec_ctrl = data;
-+              break;
-       case MSR_IA32_APICBASE:
-               if (kvm_vcpu_apicv_active(vcpu))
-                       avic_update_vapic_bar(to_svm(vcpu), data);
-@@ -4883,6 +4891,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
- 
-       local_irq_enable();
- 
-+      if (ibrs_inuse && (svm->spec_ctrl != FEATURE_ENABLE_IBRS))
-+              wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
-+
-       asm volatile (
-               "push %%" _ASM_BP "; \n\t"
-               "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
-@@ -4975,6 +4986,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
- #endif
-               );
- 
-+      if (ibrs_inuse) {
-+              rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
-+              if (svm->spec_ctrl != FEATURE_ENABLE_IBRS)
-+                      wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
-+      }
-+
- #ifdef CONFIG_X86_64
-       wrmsrl(MSR_GS_BASE, svm->host.gs_base);
- #else
--- 
-2.14.2
-
diff --git a/patches/kernel/0295-x86-svm-Set-IBPB-when-running-a-different-VCPU.patch b/patches/kernel/0295-x86-svm-Set-IBPB-when-running-a-different-VCPU.patch

deleted file mode 100644 (file)

index 9b2262c..0000000
--- a/patches/kernel/0295-x86-svm-Set-IBPB-when-running-a-different-VCPU.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Wed, 20 Dec 2017 10:55:47 +0000
-Subject: [PATCH] x86/svm: Set IBPB when running a different VCPU
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Set IBPB (Indirect Branch Prediction Barrier) when the current CPU is
-going to run a VCPU different from what was previously run.
-
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 0ba3eaabbb6666ebd344ee80534e58c375a00810)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kvm/svm.c | 16 ++++++++++++++++
- 1 file changed, 16 insertions(+)
-
-diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
-index a1b19e810c49..fade4869856a 100644
---- a/arch/x86/kvm/svm.c
-+++ b/arch/x86/kvm/svm.c
-@@ -518,6 +518,8 @@ struct svm_cpu_data {
-       struct kvm_ldttss_desc *tss_desc;
- 
-       struct page *save_area;
-+
-+      struct vmcb *current_vmcb;
- };
- 
- static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
-@@ -1685,11 +1687,19 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
-       __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
-       kvm_vcpu_uninit(vcpu);
-       kmem_cache_free(kvm_vcpu_cache, svm);
-+
-+      /*
-+       * The VMCB could be recycled, causing a false negative in svm_vcpu_load;
-+       * block speculative execution.
-+       */
-+      if (ibpb_inuse)
-+              wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
- }
- 
- static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
- {
-       struct vcpu_svm *svm = to_svm(vcpu);
-+      struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
-       int i;
- 
-       if (unlikely(cpu != vcpu->cpu)) {
-@@ -1718,6 +1728,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-       if (static_cpu_has(X86_FEATURE_RDTSCP))
-               wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
- 
-+      if (sd->current_vmcb != svm->vmcb) {
-+              sd->current_vmcb = svm->vmcb;
-+              if (ibpb_inuse)
-+                      wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
-+      }
-+
-       avic_vcpu_load(vcpu, cpu);
- }
- 
--- 
-2.14.2
-
diff --git a/patches/kernel/0295-x86-svm-Set-IBRS-value-on-VM-entry-and-exit.patch b/patches/kernel/0295-x86-svm-Set-IBRS-value-on-VM-entry-and-exit.patch

new file mode 100644 (file)

index 0000000..880d9b4
--- /dev/null
+++ b/patches/kernel/0295-x86-svm-Set-IBRS-value-on-VM-entry-and-exit.patch
@@ -0,0 +1,83 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Wed, 20 Dec 2017 10:55:47 +0000
+Subject: [PATCH] x86/svm: Set IBRS value on VM entry and exit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Set/restore the guests IBRS value on VM entry. On VM exit back to the
+kernel save the guest IBRS value and then set IBRS to 1.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 72f71e6826fac9a656c3994fb6f979cd65a14c64)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/svm.c | 17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 94adf6becc2e..a1b19e810c49 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -175,6 +175,8 @@ struct vcpu_svm {
+ 
+       u64 next_rip;
+ 
++      u64 spec_ctrl;
++
+       u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
+       struct {
+               u16 fs;
+@@ -3547,6 +3549,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+       case MSR_VM_CR:
+               msr_info->data = svm->nested.vm_cr_msr;
+               break;
++      case MSR_IA32_SPEC_CTRL:
++              msr_info->data = svm->spec_ctrl;
++              break;
+       case MSR_IA32_UCODE_REV:
+               msr_info->data = 0x01000065;
+               break;
+@@ -3702,6 +3707,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+       case MSR_VM_IGNNE:
+               vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
+               break;
++      case MSR_IA32_SPEC_CTRL:
++              svm->spec_ctrl = data;
++              break;
+       case MSR_IA32_APICBASE:
+               if (kvm_vcpu_apicv_active(vcpu))
+                       avic_update_vapic_bar(to_svm(vcpu), data);
+@@ -4883,6 +4891,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ 
+       local_irq_enable();
+ 
++      if (ibrs_inuse && (svm->spec_ctrl != FEATURE_ENABLE_IBRS))
++              wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
++
+       asm volatile (
+               "push %%" _ASM_BP "; \n\t"
+               "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
+@@ -4975,6 +4986,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ #endif
+               );
+ 
++      if (ibrs_inuse) {
++              rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
++              if (svm->spec_ctrl != FEATURE_ENABLE_IBRS)
++                      wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
++      }
++
+ #ifdef CONFIG_X86_64
+       wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+ #else
+-- 
+2.14.2
+
diff --git a/patches/kernel/0296-KVM-x86-Add-speculative-control-CPUID-support-for-gu.patch b/patches/kernel/0296-KVM-x86-Add-speculative-control-CPUID-support-for-gu.patch

deleted file mode 100644 (file)

index 8537b7c..0000000
--- a/patches/kernel/0296-KVM-x86-Add-speculative-control-CPUID-support-for-gu.patch
+++ /dev/null
@@ -1,63 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Wed, 20 Dec 2017 10:55:47 +0000
-Subject: [PATCH] KVM: x86: Add speculative control CPUID support for guests
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Provide the guest with the speculative control CPUID related values.
-
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit db7641e5f41cd517c4181ce90c4f9ecc93af4b2b)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kvm/cpuid.c | 12 ++++++++++--
- 1 file changed, 10 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
-index 19adbb418443..f64502d21a89 100644
---- a/arch/x86/kvm/cpuid.c
-+++ b/arch/x86/kvm/cpuid.c
-@@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
- /* These are scattered features in cpufeatures.h. */
- #define KVM_CPUID_BIT_AVX512_4VNNIW     2
- #define KVM_CPUID_BIT_AVX512_4FMAPS     3
-+#define KVM_CPUID_BIT_SPEC_CTRL               26
- #define KF(x) bit(KVM_CPUID_BIT_##x)
- 
- int kvm_update_cpuid(struct kvm_vcpu *vcpu)
-@@ -387,7 +388,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
- 
-       /* cpuid 7.0.edx*/
-       const u32 kvm_cpuid_7_0_edx_x86_features =
--              KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
-+              KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) |
-+              KF(SPEC_CTRL);
-+
-+      /* cpuid 0x80000008.0.ebx */
-+      const u32 kvm_cpuid_80000008_0_ebx_x86_features =
-+              F(IBPB);
- 
-       /* all calls to cpuid_count() should be made on the same cpu */
-       get_cpu();
-@@ -622,7 +628,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
-               if (!g_phys_as)
-                       g_phys_as = phys_as;
-               entry->eax = g_phys_as | (virt_as << 8);
--              entry->ebx = entry->edx = 0;
-+              entry->ebx &= kvm_cpuid_80000008_0_ebx_x86_features;
-+              cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
-+              entry->edx = 0;
-               break;
-       }
-       case 0x80000019:
--- 
-2.14.2
-
diff --git a/patches/kernel/0296-x86-svm-Set-IBPB-when-running-a-different-VCPU.patch b/patches/kernel/0296-x86-svm-Set-IBPB-when-running-a-different-VCPU.patch

new file mode 100644 (file)

index 0000000..9b2262c
--- /dev/null
+++ b/patches/kernel/0296-x86-svm-Set-IBPB-when-running-a-different-VCPU.patch
@@ -0,0 +1,73 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Wed, 20 Dec 2017 10:55:47 +0000
+Subject: [PATCH] x86/svm: Set IBPB when running a different VCPU
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Set IBPB (Indirect Branch Prediction Barrier) when the current CPU is
+going to run a VCPU different from what was previously run.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 0ba3eaabbb6666ebd344ee80534e58c375a00810)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/svm.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index a1b19e810c49..fade4869856a 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -518,6 +518,8 @@ struct svm_cpu_data {
+       struct kvm_ldttss_desc *tss_desc;
+ 
+       struct page *save_area;
++
++      struct vmcb *current_vmcb;
+ };
+ 
+ static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
+@@ -1685,11 +1687,19 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
+       __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
+       kvm_vcpu_uninit(vcpu);
+       kmem_cache_free(kvm_vcpu_cache, svm);
++
++      /*
++       * The VMCB could be recycled, causing a false negative in svm_vcpu_load;
++       * block speculative execution.
++       */
++      if (ibpb_inuse)
++              wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+ }
+ 
+ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
++      struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+       int i;
+ 
+       if (unlikely(cpu != vcpu->cpu)) {
+@@ -1718,6 +1728,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+       if (static_cpu_has(X86_FEATURE_RDTSCP))
+               wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+ 
++      if (sd->current_vmcb != svm->vmcb) {
++              sd->current_vmcb = svm->vmcb;
++              if (ibpb_inuse)
++                      wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
++      }
++
+       avic_vcpu_load(vcpu, cpu);
+ }
+ 
+-- 
+2.14.2
+
diff --git a/patches/kernel/0297-KVM-x86-Add-speculative-control-CPUID-support-for-gu.patch b/patches/kernel/0297-KVM-x86-Add-speculative-control-CPUID-support-for-gu.patch

new file mode 100644 (file)

index 0000000..8537b7c
--- /dev/null
+++ b/patches/kernel/0297-KVM-x86-Add-speculative-control-CPUID-support-for-gu.patch
@@ -0,0 +1,63 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Wed, 20 Dec 2017 10:55:47 +0000
+Subject: [PATCH] KVM: x86: Add speculative control CPUID support for guests
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Provide the guest with the speculative control CPUID related values.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit db7641e5f41cd517c4181ce90c4f9ecc93af4b2b)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/cpuid.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index 19adbb418443..f64502d21a89 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
+ /* These are scattered features in cpufeatures.h. */
+ #define KVM_CPUID_BIT_AVX512_4VNNIW     2
+ #define KVM_CPUID_BIT_AVX512_4FMAPS     3
++#define KVM_CPUID_BIT_SPEC_CTRL               26
+ #define KF(x) bit(KVM_CPUID_BIT_##x)
+ 
+ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
+@@ -387,7 +388,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ 
+       /* cpuid 7.0.edx*/
+       const u32 kvm_cpuid_7_0_edx_x86_features =
+-              KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
++              KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) |
++              KF(SPEC_CTRL);
++
++      /* cpuid 0x80000008.0.ebx */
++      const u32 kvm_cpuid_80000008_0_ebx_x86_features =
++              F(IBPB);
+ 
+       /* all calls to cpuid_count() should be made on the same cpu */
+       get_cpu();
+@@ -622,7 +628,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+               if (!g_phys_as)
+                       g_phys_as = phys_as;
+               entry->eax = g_phys_as | (virt_as << 8);
+-              entry->ebx = entry->edx = 0;
++              entry->ebx &= kvm_cpuid_80000008_0_ebx_x86_features;
++              cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
++              entry->edx = 0;
+               break;
+       }
+       case 0x80000019:
+-- 
+2.14.2
+
diff --git a/patches/kernel/0297-x86-svm-Add-code-to-clobber-the-RSB-on-VM-exit.patch b/patches/kernel/0297-x86-svm-Add-code-to-clobber-the-RSB-on-VM-exit.patch

deleted file mode 100644 (file)

index d7b82f4..0000000
--- a/patches/kernel/0297-x86-svm-Add-code-to-clobber-the-RSB-on-VM-exit.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Wed, 20 Dec 2017 10:55:47 +0000
-Subject: [PATCH] x86/svm: Add code to clobber the RSB on VM exit
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Add code to overwrite the local CPU RSB entries from the previous less
-privileged mode.
-
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 9392e24469b71ff665cdbc3d81db215f9383219d)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kvm/svm.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
-index fade4869856a..e99bdfcc6b01 100644
---- a/arch/x86/kvm/svm.c
-+++ b/arch/x86/kvm/svm.c
-@@ -5008,6 +5008,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
-                       wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
-       }
- 
-+      stuff_RSB();
-+
- #ifdef CONFIG_X86_64
-       wrmsrl(MSR_GS_BASE, svm->host.gs_base);
- #else
--- 
-2.14.2
-
diff --git a/patches/kernel/0298-x86-cpu-AMD-Remove-now-unused-definition-of-MFENCE_R.patch b/patches/kernel/0298-x86-cpu-AMD-Remove-now-unused-definition-of-MFENCE_R.patch

deleted file mode 100644 (file)

index 839cd53..0000000
--- a/patches/kernel/0298-x86-cpu-AMD-Remove-now-unused-definition-of-MFENCE_R.patch
+++ /dev/null
@@ -1,71 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tom Lendacky <thomas.lendacky@amd.com>
-Date: Wed, 20 Dec 2017 10:55:48 +0000
-Subject: [PATCH] x86/cpu/AMD: Remove now unused definition of MFENCE_RDTSC
- feature
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-With the switch to using LFENCE_RDTSC on AMD platforms there is no longer
-a need for the MFENCE_RDTSC feature.  Remove it usage and definition.
-
-Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 6e6c998937329e9d13d4b239233cd058e8a7730f)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/barrier.h | 3 +--
- arch/x86/include/asm/msr.h     | 3 +--
- arch/x86/net/bpf_jit_comp.c    | 3 ---
- 3 files changed, 2 insertions(+), 7 deletions(-)
-
-diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
-index aae78054cae2..d00432579444 100644
---- a/arch/x86/include/asm/barrier.h
-+++ b/arch/x86/include/asm/barrier.h
-@@ -23,8 +23,7 @@
- #define wmb() asm volatile("sfence" ::: "memory")
- #endif
- 
--#define gmb() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
--                                     "lfence", X86_FEATURE_LFENCE_RDTSC);
-+#define gmb() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC);
- 
- #ifdef CONFIG_X86_PPRO_FENCE
- #define dma_rmb()     rmb()
-diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
-index 898dba2e2e2c..3139098269f6 100644
---- a/arch/x86/include/asm/msr.h
-+++ b/arch/x86/include/asm/msr.h
-@@ -213,8 +213,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
-        * that some other imaginary CPU is updating continuously with a
-        * time stamp.
-        */
--      alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
--                        "lfence", X86_FEATURE_LFENCE_RDTSC);
-+      alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC);
-       return rdtsc();
- }
- 
-diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
-index 879dbfefb66d..e20e304320f9 100644
---- a/arch/x86/net/bpf_jit_comp.c
-+++ b/arch/x86/net/bpf_jit_comp.c
-@@ -116,9 +116,6 @@ static void emit_memory_barrier(u8 **pprog)
-               if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
-                       /* x86 LFENCE opcode 0F AE E8 */
-                       EMIT3(0x0f, 0xae, 0xe8);
--              else if (boot_cpu_has(X86_FEATURE_MFENCE_RDTSC))
--                      /* AMD MFENCE opcode 0F AE F0 */
--                      EMIT3(0x0f, 0xae, 0xf0);
-               else
-                       /* we should never end up here,
-                        * but if we do, better not to emit anything*/
--- 
-2.14.2
-
diff --git a/patches/kernel/0298-x86-svm-Add-code-to-clobber-the-RSB-on-VM-exit.patch b/patches/kernel/0298-x86-svm-Add-code-to-clobber-the-RSB-on-VM-exit.patch

new file mode 100644 (file)

index 0000000..d7b82f4
--- /dev/null
+++ b/patches/kernel/0298-x86-svm-Add-code-to-clobber-the-RSB-on-VM-exit.patch
@@ -0,0 +1,39 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Wed, 20 Dec 2017 10:55:47 +0000
+Subject: [PATCH] x86/svm: Add code to clobber the RSB on VM exit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Add code to overwrite the local CPU RSB entries from the previous less
+privileged mode.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 9392e24469b71ff665cdbc3d81db215f9383219d)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/svm.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index fade4869856a..e99bdfcc6b01 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -5008,6 +5008,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+                       wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
+       }
+ 
++      stuff_RSB();
++
+ #ifdef CONFIG_X86_64
+       wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+ #else
+-- 
+2.14.2
+
diff --git a/patches/kernel/0299-UBUNTU-SAUCE-x86-kvm-Fix-stuff_RSB-for-32-bit.patch b/patches/kernel/0299-UBUNTU-SAUCE-x86-kvm-Fix-stuff_RSB-for-32-bit.patch

deleted file mode 100644 (file)

index 6a04663..0000000
--- a/patches/kernel/0299-UBUNTU-SAUCE-x86-kvm-Fix-stuff_RSB-for-32-bit.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: William Grant <wgrant@ubuntu.com>
-Date: Thu, 11 Jan 2018 17:05:42 -0600
-Subject: [PATCH] UBUNTU: SAUCE: x86/kvm: Fix stuff_RSB() for 32-bit
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5753
-CVE-2017-5715
-
-Signed-off-by: William Grant <wgrant@ubuntu.com>
-Acked-by: Kamal Mostafa <kamal@canonical.com>
-Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
-(cherry picked from commit 306dada4f850bf537dbd8ff06cf1522074b3f327)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/include/asm/kvm_host.h | 10 +++++++---
- 1 file changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
-index 4117a97228a2..f39bc68efa56 100644
---- a/arch/x86/include/asm/kvm_host.h
-+++ b/arch/x86/include/asm/kvm_host.h
-@@ -223,9 +223,13 @@ static inline void stuff_RSB(void)
- .label31:     \n\
-       call .label32   \n\
-       pause   \n\
--.label32: \n\
--      add $(32*8), %%rsp      \n\
--": : :"memory");
-+.label32: \n"
-+#ifdef CONFIG_X86_64
-+"     add $(32*8), %%rsp      \n"
-+#else
-+"     add $(32*4), %%esp      \n"
-+#endif
-+: : :"memory");
- }
- 
- enum kvm_reg {
--- 
-2.14.2
-
diff --git a/patches/kernel/0299-x86-cpu-AMD-Remove-now-unused-definition-of-MFENCE_R.patch b/patches/kernel/0299-x86-cpu-AMD-Remove-now-unused-definition-of-MFENCE_R.patch

new file mode 100644 (file)

index 0000000..839cd53
--- /dev/null
+++ b/patches/kernel/0299-x86-cpu-AMD-Remove-now-unused-definition-of-MFENCE_R.patch
@@ -0,0 +1,71 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Wed, 20 Dec 2017 10:55:48 +0000
+Subject: [PATCH] x86/cpu/AMD: Remove now unused definition of MFENCE_RDTSC
+ feature
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+With the switch to using LFENCE_RDTSC on AMD platforms there is no longer
+a need for the MFENCE_RDTSC feature.  Remove it usage and definition.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 6e6c998937329e9d13d4b239233cd058e8a7730f)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/barrier.h | 3 +--
+ arch/x86/include/asm/msr.h     | 3 +--
+ arch/x86/net/bpf_jit_comp.c    | 3 ---
+ 3 files changed, 2 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
+index aae78054cae2..d00432579444 100644
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -23,8 +23,7 @@
+ #define wmb() asm volatile("sfence" ::: "memory")
+ #endif
+ 
+-#define gmb() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
+-                                     "lfence", X86_FEATURE_LFENCE_RDTSC);
++#define gmb() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC);
+ 
+ #ifdef CONFIG_X86_PPRO_FENCE
+ #define dma_rmb()     rmb()
+diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
+index 898dba2e2e2c..3139098269f6 100644
+--- a/arch/x86/include/asm/msr.h
++++ b/arch/x86/include/asm/msr.h
+@@ -213,8 +213,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
+        * that some other imaginary CPU is updating continuously with a
+        * time stamp.
+        */
+-      alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+-                        "lfence", X86_FEATURE_LFENCE_RDTSC);
++      alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC);
+       return rdtsc();
+ }
+ 
+diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
+index 879dbfefb66d..e20e304320f9 100644
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -116,9 +116,6 @@ static void emit_memory_barrier(u8 **pprog)
+               if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
+                       /* x86 LFENCE opcode 0F AE E8 */
+                       EMIT3(0x0f, 0xae, 0xe8);
+-              else if (boot_cpu_has(X86_FEATURE_MFENCE_RDTSC))
+-                      /* AMD MFENCE opcode 0F AE F0 */
+-                      EMIT3(0x0f, 0xae, 0xf0);
+               else
+                       /* we should never end up here,
+                        * but if we do, better not to emit anything*/
+-- 
+2.14.2
+
diff --git a/patches/kernel/0300-UBUNTU-SAUCE-x86-kvm-Fix-stuff_RSB-for-32-bit.patch b/patches/kernel/0300-UBUNTU-SAUCE-x86-kvm-Fix-stuff_RSB-for-32-bit.patch

new file mode 100644 (file)

index 0000000..6a04663
--- /dev/null
+++ b/patches/kernel/0300-UBUNTU-SAUCE-x86-kvm-Fix-stuff_RSB-for-32-bit.patch
@@ -0,0 +1,44 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: William Grant <wgrant@ubuntu.com>
+Date: Thu, 11 Jan 2018 17:05:42 -0600
+Subject: [PATCH] UBUNTU: SAUCE: x86/kvm: Fix stuff_RSB() for 32-bit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5753
+CVE-2017-5715
+
+Signed-off-by: William Grant <wgrant@ubuntu.com>
+Acked-by: Kamal Mostafa <kamal@canonical.com>
+Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
+(cherry picked from commit 306dada4f850bf537dbd8ff06cf1522074b3f327)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/kvm_host.h | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 4117a97228a2..f39bc68efa56 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -223,9 +223,13 @@ static inline void stuff_RSB(void)
+ .label31:     \n\
+       call .label32   \n\
+       pause   \n\
+-.label32: \n\
+-      add $(32*8), %%rsp      \n\
+-": : :"memory");
++.label32: \n"
++#ifdef CONFIG_X86_64
++"     add $(32*8), %%rsp      \n"
++#else
++"     add $(32*4), %%esp      \n"
++#endif
++: : :"memory");
+ }
+ 
+ enum kvm_reg {
+-- 
+2.14.2
+
diff --git a/patches/kernel/0300-x86-pti-Enable-PTI-by-default.patch b/patches/kernel/0300-x86-pti-Enable-PTI-by-default.patch

deleted file mode 100644 (file)

index d720d28..0000000
--- a/patches/kernel/0300-x86-pti-Enable-PTI-by-default.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 3 Jan 2018 15:18:44 +0100
-Subject: [PATCH] x86/pti: Enable PTI by default
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CVE-2017-5754
-
-This really want's to be enabled by default. Users who know what they are
-doing can disable it either in the config or on the kernel command line.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: stable@vger.kernel.org
-(cherry picked from commit 87faa0d9b43b4755ff6963a22d1fd1bee1aa3b39)
-Signed-off-by: Andy Whitcroft <apw@canonical.com>
-Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
-(cherry picked from commit 436cdbfed2112bea7943f4a0f6dfabf54088c8c6)
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- security/Kconfig | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/security/Kconfig b/security/Kconfig
-index 91cb8f611a0d..529dccc22ce5 100644
---- a/security/Kconfig
-+++ b/security/Kconfig
-@@ -98,6 +98,7 @@ config SECURITY_NETWORK
- 
- config PAGE_TABLE_ISOLATION
-       bool "Remove the kernel mapping in user mode"
-+      default y
-       depends on X86_64 && !UML
-       help
-         This feature reduces the number of hardware side channels by
--- 
-2.14.2
-
diff --git a/patches/kernel/0301-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch b/patches/kernel/0301-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch

deleted file mode 100644 (file)

index e8b4be8..0000000
--- a/patches/kernel/0301-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch
+++ /dev/null
@@ -1,49 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Andrew Honig <ahonig@google.com>
-Date: Wed, 10 Jan 2018 10:12:03 -0800
-Subject: [PATCH] KVM: x86: Add memory barrier on vmcs field lookup
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-commit 75f139aaf896d6fdeec2e468ddfa4b2fe469bf40 upstream.
-
-This adds a memory barrier when performing a lookup into
-the vmcs_field_to_offset_table.  This is related to
-CVE-2017-5753.
-
-Signed-off-by: Andrew Honig <ahonig@google.com>
-Reviewed-by: Jim Mattson <jmattson@google.com>
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kvm/vmx.c | 12 ++++++++++--
- 1 file changed, 10 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
-index d2168203bddc..e6fa3df81fd8 100644
---- a/arch/x86/kvm/vmx.c
-+++ b/arch/x86/kvm/vmx.c
-@@ -882,8 +882,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
- {
-       BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
- 
--      if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
--          vmcs_field_to_offset_table[field] == 0)
-+      if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
-+              return -ENOENT;
-+
-+      /*
-+       * FIXME: Mitigation for CVE-2017-5753.  To be replaced with a
-+       * generic mechanism.
-+       */
-+      asm("lfence");
-+
-+      if (vmcs_field_to_offset_table[field] == 0)
-               return -ENOENT;
- 
-       return vmcs_field_to_offset_table[field];
--- 
-2.14.2
-
diff --git a/patches/kernel/0301-x86-pti-Enable-PTI-by-default.patch b/patches/kernel/0301-x86-pti-Enable-PTI-by-default.patch

new file mode 100644 (file)

index 0000000..d720d28
--- /dev/null
+++ b/patches/kernel/0301-x86-pti-Enable-PTI-by-default.patch
@@ -0,0 +1,39 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 3 Jan 2018 15:18:44 +0100
+Subject: [PATCH] x86/pti: Enable PTI by default
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CVE-2017-5754
+
+This really want's to be enabled by default. Users who know what they are
+doing can disable it either in the config or on the kernel command line.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+(cherry picked from commit 87faa0d9b43b4755ff6963a22d1fd1bee1aa3b39)
+Signed-off-by: Andy Whitcroft <apw@canonical.com>
+Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
+(cherry picked from commit 436cdbfed2112bea7943f4a0f6dfabf54088c8c6)
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ security/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/security/Kconfig b/security/Kconfig
+index 91cb8f611a0d..529dccc22ce5 100644
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -98,6 +98,7 @@ config SECURITY_NETWORK
+ 
+ config PAGE_TABLE_ISOLATION
+       bool "Remove the kernel mapping in user mode"
++      default y
+       depends on X86_64 && !UML
+       help
+         This feature reduces the number of hardware side channels by
+-- 
+2.14.2
+
diff --git a/patches/kernel/0302-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch b/patches/kernel/0302-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch

new file mode 100644 (file)

index 0000000..e8b4be8
--- /dev/null
+++ b/patches/kernel/0302-KVM-x86-Add-memory-barrier-on-vmcs-field-lookup.patch
@@ -0,0 +1,49 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Andrew Honig <ahonig@google.com>
+Date: Wed, 10 Jan 2018 10:12:03 -0800
+Subject: [PATCH] KVM: x86: Add memory barrier on vmcs field lookup
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 75f139aaf896d6fdeec2e468ddfa4b2fe469bf40 upstream.
+
+This adds a memory barrier when performing a lookup into
+the vmcs_field_to_offset_table.  This is related to
+CVE-2017-5753.
+
+Signed-off-by: Andrew Honig <ahonig@google.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/vmx.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index d2168203bddc..e6fa3df81fd8 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -882,8 +882,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
+ {
+       BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
+ 
+-      if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
+-          vmcs_field_to_offset_table[field] == 0)
++      if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
++              return -ENOENT;
++
++      /*
++       * FIXME: Mitigation for CVE-2017-5753.  To be replaced with a
++       * generic mechanism.
++       */
++      asm("lfence");
++
++      if (vmcs_field_to_offset_table[field] == 0)
+               return -ENOENT;
+ 
+       return vmcs_field_to_offset_table[field];
+-- 
+2.14.2
+
diff --git a/patches/kernel/0302-x86-tboot-Unbreak-tboot-with-PTI-enabled.patch b/patches/kernel/0302-x86-tboot-Unbreak-tboot-with-PTI-enabled.patch

deleted file mode 100644 (file)

index a65f18b..0000000
--- a/patches/kernel/0302-x86-tboot-Unbreak-tboot-with-PTI-enabled.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Dave Hansen <dave.hansen@linux.intel.com>
-Date: Sat, 6 Jan 2018 18:41:14 +0100
-Subject: [PATCH] x86/tboot: Unbreak tboot with PTI enabled
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-commit 262b6b30087246abf09d6275eb0c0dc421bcbe38 upstream.
-
-This is another case similar to what EFI does: create a new set of
-page tables, map some code at a low address, and jump to it.  PTI
-mistakes this low address for userspace and mistakenly marks it
-non-executable in an effort to make it unusable for userspace.
-
-Undo the poison to allow execution.
-
-Fixes: 385ce0ea4c07 ("x86/mm/pti: Add Kconfig")
-Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
-Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
-Cc: Tim Chen <tim.c.chen@linux.intel.com>
-Cc: Jon Masters <jcm@redhat.com>
-Cc: Dave Hansen <dave.hansen@intel.com>
-Cc: Andi Kleen <andi@firstfloor.org>
-Cc: Jeff Law <law@redhat.com>
-Cc: Paolo Bonzini <pbonzini@redhat.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
-Cc: David" <dwmw@amazon.co.uk>
-Cc: Nick Clifton <nickc@redhat.com>
-Link: https://lkml.kernel.org/r/20180108102805.GK25546@redhat.com
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/kernel/tboot.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
-index a2486f444073..8337730f0956 100644
---- a/arch/x86/kernel/tboot.c
-+++ b/arch/x86/kernel/tboot.c
-@@ -127,6 +127,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
-       p4d = p4d_alloc(&tboot_mm, pgd, vaddr);
-       if (!p4d)
-               return -1;
-+      pgd->pgd &= ~_PAGE_NX;
-       pud = pud_alloc(&tboot_mm, p4d, vaddr);
-       if (!pud)
-               return -1;
--- 
-2.14.2
-
diff --git a/patches/kernel/0303-x86-perf-Disable-intel_bts-when-PTI.patch b/patches/kernel/0303-x86-perf-Disable-intel_bts-when-PTI.patch

deleted file mode 100644 (file)

index 039498e..0000000
--- a/patches/kernel/0303-x86-perf-Disable-intel_bts-when-PTI.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Sun, 14 Jan 2018 11:27:13 +0100
-Subject: [PATCH] x86,perf: Disable intel_bts when PTI
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-commit 99a9dc98ba52267ce5e062b52de88ea1f1b2a7d8 upstream.
-
-The intel_bts driver does not use the 'normal' BTS buffer which is exposed
-through the cpu_entry_area but instead uses the memory allocated for the
-perf AUX buffer.
-
-This obviously comes apart when using PTI because then the kernel mapping;
-which includes that AUX buffer memory; disappears. Fixing this requires to
-expose a mapping which is visible in all context and that's not trivial.
-
-As a quick fix disable this driver when PTI is enabled to prevent
-malfunction.
-
-Fixes: 385ce0ea4c07 ("x86/mm/pti: Add Kconfig")
-Reported-by: Vince Weaver <vincent.weaver@maine.edu>
-Reported-by: Robert Święcki <robert@swiecki.net>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
-Cc: greg@kroah.com
-Cc: hughd@google.com
-Cc: luto@amacapital.net
-Cc: Vince Weaver <vince@deater.net>
-Cc: torvalds@linux-foundation.org
-Cc: stable@vger.kernel.org
-Link: https://lkml.kernel.org/r/20180114102713.GB6166@worktop.programming.kicks-ass.net
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
----
- arch/x86/events/intel/bts.c | 18 ++++++++++++++++++
- 1 file changed, 18 insertions(+)
-
-diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
-index ddd8d3516bfc..9a62e6fce0e0 100644
---- a/arch/x86/events/intel/bts.c
-+++ b/arch/x86/events/intel/bts.c
-@@ -582,6 +582,24 @@ static __init int bts_init(void)
-       if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
-               return -ENODEV;
- 
-+      if (boot_cpu_has(X86_FEATURE_PTI)) {
-+              /*
-+               * BTS hardware writes through a virtual memory map we must
-+               * either use the kernel physical map, or the user mapping of
-+               * the AUX buffer.
-+               *
-+               * However, since this driver supports per-CPU and per-task inherit
-+               * we cannot use the user mapping since it will not be availble
-+               * if we're not running the owning process.
-+               *
-+               * With PTI we can't use the kernal map either, because its not
-+               * there when we run userspace.
-+               *
-+               * For now, disable this driver when using PTI.
-+               */
-+              return -ENODEV;
-+      }
-+
-       bts_pmu.capabilities    = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
-                                 PERF_PMU_CAP_EXCLUSIVE;
-       bts_pmu.task_ctx_nr     = perf_sw_context;
--- 
-2.14.2
-
diff --git a/patches/kernel/0303-x86-tboot-Unbreak-tboot-with-PTI-enabled.patch b/patches/kernel/0303-x86-tboot-Unbreak-tboot-with-PTI-enabled.patch

new file mode 100644 (file)

index 0000000..a65f18b
--- /dev/null
+++ b/patches/kernel/0303-x86-tboot-Unbreak-tboot-with-PTI-enabled.patch
@@ -0,0 +1,54 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Sat, 6 Jan 2018 18:41:14 +0100
+Subject: [PATCH] x86/tboot: Unbreak tboot with PTI enabled
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 262b6b30087246abf09d6275eb0c0dc421bcbe38 upstream.
+
+This is another case similar to what EFI does: create a new set of
+page tables, map some code at a low address, and jump to it.  PTI
+mistakes this low address for userspace and mistakenly marks it
+non-executable in an effort to make it unusable for userspace.
+
+Undo the poison to allow execution.
+
+Fixes: 385ce0ea4c07 ("x86/mm/pti: Add Kconfig")
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Jon Masters <jcm@redhat.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: Jeff Law <law@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: David" <dwmw@amazon.co.uk>
+Cc: Nick Clifton <nickc@redhat.com>
+Link: https://lkml.kernel.org/r/20180108102805.GK25546@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kernel/tboot.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
+index a2486f444073..8337730f0956 100644
+--- a/arch/x86/kernel/tboot.c
++++ b/arch/x86/kernel/tboot.c
+@@ -127,6 +127,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
+       p4d = p4d_alloc(&tboot_mm, pgd, vaddr);
+       if (!p4d)
+               return -1;
++      pgd->pgd &= ~_PAGE_NX;
+       pud = pud_alloc(&tboot_mm, p4d, vaddr);
+       if (!pud)
+               return -1;
+-- 
+2.14.2
+
diff --git a/patches/kernel/0304-x86-perf-Disable-intel_bts-when-PTI.patch b/patches/kernel/0304-x86-perf-Disable-intel_bts-when-PTI.patch

new file mode 100644 (file)

index 0000000..039498e
--- /dev/null
+++ b/patches/kernel/0304-x86-perf-Disable-intel_bts-when-PTI.patch
@@ -0,0 +1,72 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sun, 14 Jan 2018 11:27:13 +0100
+Subject: [PATCH] x86,perf: Disable intel_bts when PTI
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 99a9dc98ba52267ce5e062b52de88ea1f1b2a7d8 upstream.
+
+The intel_bts driver does not use the 'normal' BTS buffer which is exposed
+through the cpu_entry_area but instead uses the memory allocated for the
+perf AUX buffer.
+
+This obviously comes apart when using PTI because then the kernel mapping;
+which includes that AUX buffer memory; disappears. Fixing this requires to
+expose a mapping which is visible in all context and that's not trivial.
+
+As a quick fix disable this driver when PTI is enabled to prevent
+malfunction.
+
+Fixes: 385ce0ea4c07 ("x86/mm/pti: Add Kconfig")
+Reported-by: Vince Weaver <vincent.weaver@maine.edu>
+Reported-by: Robert Święcki <robert@swiecki.net>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: greg@kroah.com
+Cc: hughd@google.com
+Cc: luto@amacapital.net
+Cc: Vince Weaver <vince@deater.net>
+Cc: torvalds@linux-foundation.org
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20180114102713.GB6166@worktop.programming.kicks-ass.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/events/intel/bts.c | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
+index ddd8d3516bfc..9a62e6fce0e0 100644
+--- a/arch/x86/events/intel/bts.c
++++ b/arch/x86/events/intel/bts.c
+@@ -582,6 +582,24 @@ static __init int bts_init(void)
+       if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
+               return -ENODEV;
+ 
++      if (boot_cpu_has(X86_FEATURE_PTI)) {
++              /*
++               * BTS hardware writes through a virtual memory map we must
++               * either use the kernel physical map, or the user mapping of
++               * the AUX buffer.
++               *
++               * However, since this driver supports per-CPU and per-task inherit
++               * we cannot use the user mapping since it will not be availble
++               * if we're not running the owning process.
++               *
++               * With PTI we can't use the kernal map either, because its not
++               * there when we run userspace.
++               *
++               * For now, disable this driver when using PTI.
++               */
++              return -ENODEV;
++      }
++
+       bts_pmu.capabilities    = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
+                                 PERF_PMU_CAP_EXCLUSIVE;
+       bts_pmu.task_ctx_nr     = perf_sw_context;
+-- 
+2.14.2
+
author	Fabian Grünbichler <f.gruenbichler@proxmox.com>
	Fri, 19 Jan 2018 11:43:16 +0000 (12:43 +0100)
committer	Fabian Grünbichler <f.gruenbichler@proxmox.com>
	Fri, 19 Jan 2018 11:43:16 +0000 (12:43 +0100)