]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - arch/x86/kernel/cpu/common.c
x86/entry/64: Make cpu_entry_area.tss read-only
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / cpu / common.c
index c2b2ee73b8a15daf531d7421bcf4ab7a3b7e336f..7992e5a8076ccdb40c6009eed9ccd1c2e68cc807 100644 (file)
@@ -466,45 +466,66 @@ void load_percpu_segment(int cpu)
        load_stack_canary_segment();
 }
 
-static void set_percpu_fixmap_pages(int fixmap_index, void *ptr,
-                                   int pages, pgprot_t prot)
-{
-       int i;
-
-       for (i = 0; i < pages; i++) {
-               __set_fixmap(fixmap_index - i,
-                            per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot);
-       }
-}
-
 #ifdef CONFIG_X86_32
 /* The 32-bit entry code needs to find cpu_entry_area. */
 DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
 #endif
 
+#ifdef CONFIG_X86_64
+/*
+ * Special IST stacks which the CPU switches to when it calls
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
+ * limit), all of them are 4K, except the debug stack which
+ * is 8K.
+ */
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
+         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
+};
+
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+#endif
+
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
+                                  SYSENTER_stack_storage);
+
+static void __init
+set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
+{
+       for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
+               __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
+}
+
 /* Setup the fixmap mappings only once per-processor */
-static inline void setup_cpu_entry_area(int cpu)
+static void __init setup_cpu_entry_area(int cpu)
 {
 #ifdef CONFIG_X86_64
        extern char _entry_trampoline[];
 
-       /* On 64-bit systems, we use a read-only fixmap GDT. */
+       /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
        pgprot_t gdt_prot = PAGE_KERNEL_RO;
+       pgprot_t tss_prot = PAGE_KERNEL_RO;
 #else
        /*
         * On native 32-bit systems, the GDT cannot be read-only because
         * our double fault handler uses a task gate, and entering through
-        * a task gate needs to change an available TSS to busy.  If the GDT
-        * is read-only, that will triple fault.
+        * a task gate needs to change an available TSS to busy.  If the
+        * GDT is read-only, that will triple fault.  The TSS cannot be
+        * read-only because the CPU writes to it on task switches.
         *
-        * On Xen PV, the GDT must be read-only because the hypervisor requires
-        * it.
+        * On Xen PV, the GDT must be read-only because the hypervisor
+        * requires it.
         */
        pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
                PAGE_KERNEL_RO : PAGE_KERNEL;
+       pgprot_t tss_prot = PAGE_KERNEL;
 #endif
 
        __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
+       set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
+                               per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
+                               PAGE_KERNEL);
 
        /*
         * The Intel SDM says (Volume 3, 7.2.1):
@@ -527,20 +548,36 @@ static inline void setup_cpu_entry_area(int cpu)
                      offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
        BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
        set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
-                               &per_cpu(cpu_tss, cpu),
+                               &per_cpu(cpu_tss_rw, cpu),
                                sizeof(struct tss_struct) / PAGE_SIZE,
-                               PAGE_KERNEL);
+                               tss_prot);
 
 #ifdef CONFIG_X86_32
-       this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu));
+       per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
 #endif
 
 #ifdef CONFIG_X86_64
+       BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+       BUILD_BUG_ON(sizeof(exception_stacks) !=
+                    sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+       set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
+                               &per_cpu(exception_stacks, cpu),
+                               sizeof(exception_stacks) / PAGE_SIZE,
+                               PAGE_KERNEL);
+
        __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
                     __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
 #endif
 }
 
+void __init setup_cpu_entry_areas(void)
+{
+       unsigned int cpu;
+
+       for_each_possible_cpu(cpu)
+               setup_cpu_entry_area(cpu);
+}
+
 /* Load the original GDT from the per-cpu structure */
 void load_direct_gdt(int cpu)
 {
@@ -1269,7 +1306,7 @@ void enable_sep_cpu(void)
                return;
 
        cpu = get_cpu();
-       tss = &per_cpu(cpu_tss, cpu);
+       tss = &per_cpu(cpu_tss_rw, cpu);
 
        /*
         * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1278,12 +1315,7 @@ void enable_sep_cpu(void)
 
        tss->x86_tss.ss1 = __KERNEL_CS;
        wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
-
-       wrmsr(MSR_IA32_SYSENTER_ESP,
-             (unsigned long)&get_cpu_entry_area(cpu)->tss +
-             offsetofend(struct tss_struct, SYSENTER_stack),
-             0);
-
+       wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
        wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
 
        put_cpu();
@@ -1386,20 +1418,6 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
-         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
-         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
-};
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
@@ -1423,9 +1441,7 @@ void syscall_init(void)
         * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
         */
        wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-       wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
-                   (unsigned long)&get_cpu_entry_area(cpu)->tss +
-                   offsetofend(struct tss_struct, SYSENTER_stack));
+       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
        wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
 #else
        wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1569,7 +1585,7 @@ void cpu_init(void)
        if (cpu)
                load_ucode_ap();
 
-       t = &per_cpu(cpu_tss, cpu);
+       t = &per_cpu(cpu_tss_rw, cpu);
        oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
@@ -1608,7 +1624,7 @@ void cpu_init(void)
         * set up and load the per-CPU TSS
         */
        if (!oist->ist[0]) {
-               char *estacks = per_cpu(exception_stacks, cpu);
+               char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
 
                for (v = 0; v < N_EXCEPTION_STACKS; v++) {
                        estacks += exception_stack_sizes[v];
@@ -1633,16 +1649,13 @@ void cpu_init(void)
        BUG_ON(me->mm);
        enter_lazy_tlb(&init_mm, me);
 
-       setup_cpu_entry_area(cpu);
-
        /*
         * Initialize the TSS.  sp0 points to the entry trampoline stack
         * regardless of what task is running.
         */
        set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
        load_TR_desc();
-       load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss +
-                offsetofend(struct tss_struct, SYSENTER_stack));
+       load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
 
        load_mm_ldt(&init_mm);
 
@@ -1663,7 +1676,7 @@ void cpu_init(void)
 {
        int cpu = smp_processor_id();
        struct task_struct *curr = current;
-       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
 
        wait_for_master_cpu(cpu);
 
@@ -1693,8 +1706,6 @@ void cpu_init(void)
        BUG_ON(curr->mm);
        enter_lazy_tlb(&init_mm, curr);
 
-       setup_cpu_entry_area(cpu);
-
        /*
         * Initialize the TSS.  Don't bother initializing sp0, as the initial
         * task never enters user mode.