ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
... unused hole ...
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
+ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
The mappings are not part of any other kernel PGD and are only available
during EFI runtime calls.
+The module mapping space size changes based on the CONFIG requirements for the
+following fixmap section.
+
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
Their order is preserved but their base will be offset early at boot time.
This value can be changed after boot using the
/proc/sys/vm/mmap_rnd_compat_bits tunable
+config HAVE_ARCH_COMPAT_MMAP_BASES
+ bool
+ help
+ This allows 64bit applications to invoke 32-bit mmap() syscall
+ and vice-versa 32-bit applications to call 64-bit mmap().
+ Required for applications doing different bitness syscalls.
+
config HAVE_COPY_THREAD_TLS
bool
help
select HAVE_ARCH_KMEMCHECK
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
+ select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
d.p = 1; /* Present */
d.d = 1; /* 32-bit */
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+ write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
}
static int vgetcpu_online(unsigned int cpu)
#include <asm/desc_defs.h>
#include <asm/ldt.h>
#include <asm/mmu.h>
+#include <asm/fixmap.h>
#include <linux/smp.h>
#include <linux/percpu.h>
extern gate_desc idt_table[];
extern const struct desc_ptr debug_idt_descr;
extern gate_desc debug_idt_table[];
+extern pgprot_t pg_fixmap_gdt_flags;
struct gdt_page {
struct desc_struct gdt[GDT_ENTRIES];
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+/* Provide the original GDT */
+static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
{
return per_cpu(gdt_page, cpu).gdt;
}
+static inline unsigned long get_cpu_gdt_rw_vaddr(unsigned int cpu)
+{
+ return (unsigned long)get_cpu_gdt_rw(cpu);
+}
+
+/* Provide the current original GDT */
+static inline struct desc_struct *get_current_gdt_rw(void)
+{
+ return this_cpu_ptr(&gdt_page)->gdt;
+}
+
+static inline unsigned long get_current_gdt_rw_vaddr(void)
+{
+ return (unsigned long)get_current_gdt_rw();
+}
+
+/* Get the fixmap index for a specific processor */
+static inline unsigned int get_cpu_gdt_ro_index(int cpu)
+{
+ return FIX_GDT_REMAP_BEGIN + cpu;
+}
+
+/* Provide the fixmap address of the remapped GDT */
+static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
+{
+ unsigned int idx = get_cpu_gdt_ro_index(cpu);
+ return (struct desc_struct *)__fix_to_virt(idx);
+}
+
+static inline unsigned long get_cpu_gdt_ro_vaddr(int cpu)
+{
+ return (unsigned long)get_cpu_gdt_ro(cpu);
+}
+
+/* Provide the current read-only GDT */
+static inline struct desc_struct *get_current_gdt_ro(void)
+{
+ return get_cpu_gdt_ro(smp_processor_id());
+}
+
+static inline unsigned long get_current_gdt_ro_vaddr(void)
+{
+ return (unsigned long)get_current_gdt_ro();
+}
+
#ifdef CONFIG_X86_64
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
{
- struct desc_struct *d = get_cpu_gdt_table(cpu);
+ struct desc_struct *d = get_cpu_gdt_rw(cpu);
tss_desc tss;
set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
entries * LDT_ENTRY_SIZE - 1);
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+ write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
&ldt, DESC_LDT);
asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
}
}
+static inline void native_load_gdt(const struct desc_ptr *dtr)
+{
+ asm volatile("lgdt %0"::"m" (*dtr));
+}
+
+static inline void native_load_idt(const struct desc_ptr *dtr)
+{
+ asm volatile("lidt %0"::"m" (*dtr));
+}
+
+static inline void native_store_gdt(struct desc_ptr *dtr)
+{
+ asm volatile("sgdt %0":"=m" (*dtr));
+}
+
+static inline void native_store_idt(struct desc_ptr *dtr)
+{
+ asm volatile("sidt %0":"=m" (*dtr));
+}
+
+/*
+ * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
+ * a read-only remapping. To prevent a page fault, the GDT is switched to the
+ * original writeable version when needed.
+ */
+#ifdef CONFIG_X86_64
+static inline void native_load_tr_desc(void)
+{
+ struct desc_ptr gdt;
+ int cpu = raw_smp_processor_id();
+ bool restore = 0;
+ struct desc_struct *fixmap_gdt;
+
+ native_store_gdt(&gdt);
+ fixmap_gdt = get_cpu_gdt_ro(cpu);
+
+ /*
+ * If the current GDT is the read-only fixmap, swap to the original
+ * writeable version. Swap back at the end.
+ */
+ if (gdt.address == (unsigned long)fixmap_gdt) {
+ load_direct_gdt(cpu);
+ restore = 1;
+ }
+ asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+ if (restore)
+ load_fixmap_gdt(cpu);
+}
+#else
static inline void native_load_tr_desc(void)
{
asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
}
+#endif
+
+static inline unsigned long native_store_tr(void)
+{
+ unsigned long tr;
+
+ asm volatile("str %0":"=r" (tr));
+
+ return tr;
+}
+
+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+ struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
+ unsigned int i;
+
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+ gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
+}
DECLARE_PER_CPU(bool, __tss_limit_invalid);
static inline void force_reload_TR(void)
{
- struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
+ struct desc_struct *d = get_current_gdt_rw();
tss_desc tss;
memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
this_cpu_write(__tss_limit_invalid, true);
}
-static inline void native_load_gdt(const struct desc_ptr *dtr)
-{
- asm volatile("lgdt %0"::"m" (*dtr));
-}
-
-static inline void native_load_idt(const struct desc_ptr *dtr)
-{
- asm volatile("lidt %0"::"m" (*dtr));
-}
-
-static inline void native_store_gdt(struct desc_ptr *dtr)
-{
- asm volatile("sgdt %0":"=m" (*dtr));
-}
-
-static inline void native_store_idt(struct desc_ptr *dtr)
-{
- asm volatile("sidt %0":"=m" (*dtr));
-}
-
-static inline unsigned long native_store_tr(void)
-{
- unsigned long tr;
-
- asm volatile("str %0":"=r" (tr));
-
- return tr;
-}
-
-static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
-{
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
- unsigned int i;
-
- for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
- gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
-}
-
/* This intentionally ignores lm, since 32-bit apps don't have that field. */
#define LDT_empty(info) \
((info)->base_addr == 0 && \
} \
} while (0)
+/*
+ * True on X86_32 or when emulating IA32 on X86_64
+ */
+static inline int mmap_is_ia32(void)
+{
+ return IS_ENABLED(CONFIG_X86_32) ||
+ (IS_ENABLED(CONFIG_COMPAT) &&
+ test_thread_flag(TIF_ADDR32));
+}
+
+extern unsigned long tasksize_32bit(void);
+extern unsigned long tasksize_64bit(void);
+extern unsigned long get_mmap_base(int is_legacy);
+
#ifdef CONFIG_X86_32
+#define __STACK_RND_MASK(is32bit) (0x7ff)
#define STACK_RND_MASK (0x7ff)
#define ARCH_DLINFO ARCH_DLINFO_IA32
#else /* CONFIG_X86_32 */
/* 1GB for 64bit, 8MB for 32bit */
-#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
+#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff)
+#define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32())
#define ARCH_DLINFO \
do { \
int uses_interp);
#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
-/*
- * True on X86_32 or when emulating IA32 on X86_64
- */
-static inline int mmap_is_ia32(void)
-{
- return IS_ENABLED(CONFIG_X86_32) ||
- (IS_ENABLED(CONFIG_COMPAT) &&
- test_thread_flag(TIF_ADDR32));
-}
-
/* Do not change the values. See get_align_mask() */
enum align_flags {
ALIGN_VA_32 = BIT(0),
#ifdef CONFIG_X86_INTEL_MID
FIX_LNW_VRTC,
#endif
+ /* Fixmap entries to remap the GDTs, one per processor. */
+ FIX_GDT_REMAP_BEGIN,
+ FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+
__end_of_permanent_fixed_addresses,
/*
typedef unsigned long pteval_t;
typedef unsigned long pmdval_t;
typedef unsigned long pudval_t;
+typedef unsigned long p4dval_t;
typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
typedef u64 pteval_t;
typedef u64 pmdval_t;
typedef u64 pudval_t;
+typedef u64 p4dval_t;
typedef u64 pgdval_t;
typedef u64 pgprotval_t;
return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
}
+static inline unsigned long p4d_pfn(p4d_t p4d)
+{
+ return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
+}
+
+static inline int p4d_large(p4d_t p4d)
+{
+ /* No 512 GiB pages yet */
+ return 0;
+}
+
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
static inline int pmd_large(pmd_t pte)
}
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
+static inline unsigned long pud_index(unsigned long address)
+{
+ return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+
+static inline unsigned long p4d_index(unsigned long address)
+{
+ return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1);
+}
+
#if CONFIG_PGTABLE_LEVELS > 3
static inline int pgd_present(pgd_t pgd)
{
#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
/* to find an entry in a page-table-directory. */
-static inline unsigned long pud_index(unsigned long address)
-{
- return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
-}
-
static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
{
return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
typedef unsigned long pteval_t;
typedef unsigned long pmdval_t;
typedef unsigned long pudval_t;
+typedef unsigned long p4dval_t;
typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
#endif /* CONFIG_RANDOMIZE_MEMORY */
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
-#define MODULES_END _AC(0xffffffffff000000, UL)
+/* The module sections ends with the start of the fixmap */
+#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
return native_pgd_val(pgd) & PTE_FLAGS_MASK;
}
-#if CONFIG_PGTABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 4
+
+#error FIXME
+
+#else
#include <asm-generic/5level-fixup.h>
+static inline p4dval_t native_p4d_val(p4d_t p4d)
+{
+ return native_pgd_val(p4d);
+}
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 3
typedef struct { pudval_t pud; } pud_t;
static inline pud_t native_make_pud(pmdval_t val)
}
#endif
+static inline p4dval_t p4d_pfn_mask(p4d_t p4d)
+{
+ /* No 512 GiB huge pages yet */
+ return PTE_PFN_MASK;
+}
+
+static inline p4dval_t p4d_flags_mask(p4d_t p4d)
+{
+ return ~p4d_pfn_mask(p4d);
+}
+
+static inline p4dval_t p4d_flags(p4d_t p4d)
+{
+ return native_p4d_val(p4d) & p4d_flags_mask(p4d);
+}
+
static inline pudval_t pud_pfn_mask(pud_t pud)
{
if (native_pud_val(pud) & _PAGE_PSE)
PG_LEVEL_4K,
PG_LEVEL_2M,
PG_LEVEL_1G,
+ PG_LEVEL_512G,
PG_LEVEL_NUM
};
extern void cpu_set_gdt(int);
extern void switch_to_new_gdt(int);
+extern void load_direct_gdt(int);
+extern void load_fixmap_gdt(int);
extern void load_percpu_segment(int);
extern void cpu_init(void);
/*
* User space process size: 3GB (default).
*/
+#define IA32_PAGE_OFFSET PAGE_OFFSET
#define TASK_SIZE PAGE_OFFSET
#define TASK_SIZE_MAX TASK_SIZE
#define STACK_TOP TASK_SIZE
* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
-#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
+#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
+#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE)
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
{
#ifdef CONFIG_X86_32
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
- struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+ struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
struct desc_struct desc;
desc = gdt_table[GDT_ENTRY_STACK_CANARY];
#ifdef CONFIG_SMP
initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
- (unsigned long)get_cpu_gdt_table(smp_processor_id());
+ (unsigned long)get_cpu_gdt_rw(smp_processor_id());
initial_gs = per_cpu_offset(smp_processor_id());
#endif
initial_code = (unsigned long)wakeup_long64;
cpu = get_cpu();
BUG_ON(cpu != 0);
- gdt = get_cpu_gdt_table(cpu);
+ gdt = get_cpu_gdt_rw(cpu);
save_desc_40 = gdt[0x40 / 8];
gdt[0x40 / 8] = bad_bios_desc;
cpu = get_cpu();
BUG_ON(cpu != 0);
- gdt = get_cpu_gdt_table(cpu);
+ gdt = get_cpu_gdt_rw(cpu);
save_desc_40 = gdt[0x40 / 8];
gdt[0x40 / 8] = bad_bios_desc;
* Note we only set APM segments on CPU zero, since we pin the APM
* code to that CPU.
*/
- gdt = get_cpu_gdt_table(0);
+ gdt = get_cpu_gdt_rw(0);
set_desc_base(&gdt[APM_CS >> 3],
(unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
set_desc_base(&gdt[APM_CS_16 >> 3],
}
/*
- * Current gdt points %fs at the "master" per-cpu area: after this,
- * it's on the real one.
+ * On 64-bit the GDT remapping is read-only.
+ * A global is used for Xen to change the default when required.
*/
-void switch_to_new_gdt(int cpu)
+#ifdef CONFIG_X86_64
+pgprot_t pg_fixmap_gdt_flags = PAGE_KERNEL_RO;
+#else
+pgprot_t pg_fixmap_gdt_flags = PAGE_KERNEL;
+#endif
+
+/* Setup the fixmap mapping only once per-processor */
+static inline void setup_fixmap_gdt(int cpu)
+{
+ __set_fixmap(get_cpu_gdt_ro_index(cpu),
+ __pa(get_cpu_gdt_rw(cpu)), pg_fixmap_gdt_flags);
+}
+
+/* Load the original GDT from the per-cpu structure */
+void load_direct_gdt(int cpu)
{
struct desc_ptr gdt_descr;
- gdt_descr.address = (long)get_cpu_gdt_table(cpu);
+ gdt_descr.address = (long)get_cpu_gdt_rw(cpu);
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
- /* Reload the per-cpu base */
+}
+EXPORT_SYMBOL_GPL(load_direct_gdt);
+
+/* Load a fixmap remapping of the per-cpu GDT */
+void load_fixmap_gdt(int cpu)
+{
+ struct desc_ptr gdt_descr;
+ gdt_descr.address = (long)get_cpu_gdt_ro(cpu);
+ gdt_descr.size = GDT_SIZE - 1;
+ load_gdt(&gdt_descr);
+}
+EXPORT_SYMBOL_GPL(load_fixmap_gdt);
+
+/*
+ * Current gdt points %fs at the "master" per-cpu area: after this,
+ * it's on the real one.
+ */
+void switch_to_new_gdt(int cpu)
+{
+ /* Load the original GDT */
+ load_direct_gdt(cpu);
+ /* Reload the per-cpu base */
load_percpu_segment(cpu);
}
if (is_uv_system())
uv_cpu_init();
+
+ setup_fixmap_gdt(cpu);
+ load_fixmap_gdt(cpu);
}
#else
dbg_restore_debug_regs();
fpu__init_cpu();
+
+ setup_fixmap_gdt(cpu);
+ load_fixmap_gdt(cpu);
}
#endif
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
+#include <asm/fixmap.h>
#if 0
#define DEBUGP(fmt, ...) \
pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
0x2 | DESCTYPE_S, 0x8);
gdt.s = 1;
- write_gdt_entry(get_cpu_gdt_table(cpu),
+ write_gdt_entry(get_cpu_gdt_rw(cpu),
GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
#endif
}
unsigned long timeout;
idle->thread.sp = (unsigned long)task_pt_regs(idle);
- early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+ early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_code = (unsigned long)start_secondary;
initial_stack = idle->thread.sp;
#include <linux/uaccess.h>
#include <linux/elf.h>
+#include <asm/elf.h>
+#include <asm/compat.h>
#include <asm/ia32.h>
#include <asm/syscalls.h>
static void find_start_end(unsigned long flags, unsigned long *begin,
unsigned long *end)
{
- if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) {
+ if (!in_compat_syscall() && (flags & MAP_32BIT)) {
/* This is usually used needed to map code in small
model, so it needs to be in the first 31bit. Limit
it to that. This means we need to move the
if (current->flags & PF_RANDOMIZE) {
*begin = randomize_page(*begin, 0x02000000);
}
- } else {
- *begin = current->mm->mmap_legacy_base;
- *end = TASK_SIZE;
+ return;
}
+
+ *begin = get_mmap_base(1);
+ *end = in_compat_syscall() ? tasksize_32bit() : tasksize_64bit();
}
unsigned long
return addr;
/* for MAP_32BIT mappings we force the legacy mmap base */
- if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
+ if (!in_compat_syscall() && (flags & MAP_32BIT))
goto bottomup;
/* requesting a specific address */
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
- info.high_limit = mm->mmap_base;
+ info.high_limit = get_mmap_base(0);
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
if (filp) {
pgprot_t prot)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(&tboot_mm, vaddr);
- pud = pud_alloc(&tboot_mm, pgd, vaddr);
+ p4d = p4d_alloc(&tboot_mm, pgd, vaddr);
+ if (!p4d)
+ return -1;
+ pud = pud_alloc(&tboot_mm, p4d, vaddr);
if (!pud)
return -1;
pmd = pmd_alloc(&tboot_mm, pud, vaddr);
struct vm_area_struct *vma;
spinlock_t *ptl;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(mm, 0xA0000);
if (pgd_none_or_clear_bad(pgd))
goto out;
- pud = pud_offset(pgd, 0xA0000);
+ p4d = p4d_offset(pgd, 0xA0000);
+ if (p4d_none_or_clear_bad(p4d))
+ goto out;
+ pud = pud_offset(p4d, 0xA0000);
if (pud_none_or_clear_bad(pud))
goto out;
pmd = pmd_offset(pud, 0xA0000);
struct svm_cpu_data *sd;
uint64_t efer;
- struct desc_ptr gdt_descr;
struct desc_struct *gdt;
int me = raw_smp_processor_id();
sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
sd->next_asid = sd->max_asid + 1;
- native_store_gdt(&gdt_descr);
- gdt = (struct desc_struct *)gdt_descr.address;
+ gdt = get_current_gdt_rw();
sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
wrmsrl(MSR_EFER, efer | EFER_SVME);
* when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
*/
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
-static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
/*
* We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
*/
static unsigned long segment_base(u16 selector)
{
- struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
struct desc_struct *table;
unsigned long v;
if (!(selector & ~SEGMENT_RPL_MASK))
return 0;
- table = (struct desc_struct *)gdt->address;
+ table = get_current_gdt_ro();
if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
u16 ldt_selector = kvm_read_ldt();
#endif
if (vmx->host_state.msr_host_bndcfgs)
wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
- load_gdt(this_cpu_ptr(&host_gdt));
+ load_fixmap_gdt(raw_smp_processor_id());
}
static void vmx_load_host_state(struct vcpu_vmx *vmx)
}
if (!already_loaded) {
- struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+ unsigned long gdt = get_current_gdt_ro_vaddr();
unsigned long sysenter_esp;
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
*/
vmcs_writel(HOST_TR_BASE,
(unsigned long)this_cpu_ptr(&cpu_tss));
- vmcs_writel(HOST_GDTR_BASE, gdt->address);
+ vmcs_writel(HOST_GDTR_BASE, gdt); /* 22.2.4 */
/*
* VM exits change the host TR limit to 0x67 after a VM
ept_sync_global();
}
- native_store_gdt(this_cpu_ptr(&host_gdt));
-
return 0;
}
#include <asm/kasan.h>
#include <asm/pgtable.h>
+#include <asm/fixmap.h>
/*
* The dumper groups pagetable entries of the same type into one, and for
{
unsigned index = pgd_index(address);
pgd_t *pgd_k;
+ p4d_t *p4d, *p4d_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
/*
* set_pgd(pgd, *pgd_k); here would be useless on PAE
* and redundant with the set_pmd() on non-PAE. As would
- * set_pud.
+ * set_p4d/set_pud.
*/
- pud = pud_offset(pgd, address);
- pud_k = pud_offset(pgd_k, address);
+ p4d = p4d_offset(pgd, address);
+ p4d_k = p4d_offset(pgd_k, address);
+ if (!p4d_present(*p4d_k))
+ return NULL;
+
+ pud = pud_offset(p4d, address);
+ pud_k = pud_offset(p4d_k, address);
if (!pud_present(*pud_k))
return NULL;
{
pgd_t *base = __va(read_cr3());
pgd_t *pgd = &base[pgd_index(address)];
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
goto out;
#endif
- pmd = pmd_offset(pud_offset(pgd, address), address);
+ p4d = p4d_offset(pgd, address);
+ pud = pud_offset(p4d, address);
+ pmd = pmd_offset(pud, address);
printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
/*
static noinline int vmalloc_fault(unsigned long address)
{
pgd_t *pgd, *pgd_ref;
+ p4d_t *p4d, *p4d_ref;
pud_t *pud, *pud_ref;
pmd_t *pmd, *pmd_ref;
pte_t *pte, *pte_ref;
if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_ref);
arch_flush_lazy_mmu_mode();
- } else {
+ } else if (CONFIG_PGTABLE_LEVELS > 4) {
+ /*
+ * With folded p4d, pgd_none() is always false, so the pgd may
+ * point to an empty page table entry and pgd_page_vaddr()
+ * will return garbage.
+ *
+ * We will do the correct sanity check on the p4d level.
+ */
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
}
+ /* With 4-level paging, copying happens on the p4d level. */
+ p4d = p4d_offset(pgd, address);
+ p4d_ref = p4d_offset(pgd_ref, address);
+ if (p4d_none(*p4d_ref))
+ return -1;
+
+ if (p4d_none(*p4d)) {
+ set_p4d(p4d, *p4d_ref);
+ arch_flush_lazy_mmu_mode();
+ } else {
+ BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_ref));
+ }
+
/*
* Below here mismatches are bugs because these lower tables
* are shared:
*/
- pud = pud_offset(pgd, address);
- pud_ref = pud_offset(pgd_ref, address);
+ pud = pud_offset(p4d, address);
+ pud_ref = pud_offset(p4d_ref, address);
if (pud_none(*pud_ref))
return -1;
{
pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
pgd_t *pgd = base + pgd_index(address);
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (!pgd_present(*pgd))
goto out;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (bad_address(p4d))
+ goto bad;
+
+ printk("P4D %lx ", p4d_val(*p4d));
+ if (!p4d_present(*p4d) || p4d_large(*p4d))
+ goto out;
+
+ pud = pud_offset(p4d, address);
if (bad_address(pud))
goto bad;
spurious_fault(unsigned long error_code, unsigned long address)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (!pgd_present(*pgd))
return 0;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (!p4d_present(*p4d))
+ return 0;
+
+ if (p4d_large(*p4d))
+ return spurious_fault_check(error_code, (pte_t *) p4d);
+
+ pud = pud_offset(p4d, address);
if (!pud_present(*pud))
return 0;
}
/*
- * 'pteval' can come from a pte, pmd or pud. We only check
+ * 'pteval' can come from a pte, pmd, pud or p4d. We only check
* _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
- * same value on all 3 types.
+ * same value on all 4 types.
*/
static inline int pte_allows_gup(unsigned long pteval, int write)
{
return 1;
}
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp;
- pudp = pud_offset(&pgd, addr);
+ pudp = pud_offset(&p4d, addr);
do {
pud_t pud = *pudp;
return 1;
}
+static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ p4d_t *p4dp;
+
+ p4dp = p4d_offset(&pgd, addr);
+ do {
+ p4d_t p4d = *p4dp;
+
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(p4d))
+ return 0;
+ BUILD_BUG_ON(p4d_large(p4d));
+ if (!gup_pud_range(p4d, addr, next, write, pages, nr))
+ return 0;
+ } while (p4dp++, addr = next, addr != end);
+
+ return 1;
+}
+
/*
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
* back to the regular GUP.
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
goto slow;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
goto slow;
} while (pgdp++, addr = next, addr != end);
local_irq_enable();
#include <linux/pagemap.h>
#include <linux/err.h>
#include <linux/sysctl.h>
+#include <linux/compat.h>
#include <asm/mman.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
+#include <asm/elf.h>
#if 0 /* This is just for testing */
struct page *
info.flags = 0;
info.length = len;
- info.low_limit = current->mm->mmap_legacy_base;
- info.high_limit = TASK_SIZE;
+ info.low_limit = get_mmap_base(1);
+ info.high_limit = in_compat_syscall() ?
+ tasksize_32bit() : tasksize_64bit();
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
return vm_unmapped_area(&info);
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
- info.high_limit = current->mm->mmap_base;
+ info.high_limit = get_mmap_base(0);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
addr = vm_unmapped_area(&info);
return 0;
}
+static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next) {
+ p4d_t *p4d = p4d_page + p4d_index(addr);
+ pud_t *pud;
+
+ next = (addr & P4D_MASK) + P4D_SIZE;
+ if (next > end)
+ next = end;
+
+ if (p4d_present(*p4d)) {
+ pud = pud_offset(p4d, 0);
+ ident_pud_init(info, pud, addr, next);
+ continue;
+ }
+ pud = (pud_t *)info->alloc_pgt_page(info->context);
+ if (!pud)
+ return -ENOMEM;
+ ident_pud_init(info, pud, addr, next);
+ set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
+
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
unsigned long pstart, unsigned long pend)
{
for (; addr < end; addr = next) {
pgd_t *pgd = pgd_page + pgd_index(addr);
- pud_t *pud;
+ p4d_t *p4d;
next = (addr & PGDIR_MASK) + PGDIR_SIZE;
if (next > end)
next = end;
if (pgd_present(*pgd)) {
- pud = pud_offset(pgd, 0);
- result = ident_pud_init(info, pud, addr, next);
+ p4d = p4d_offset(pgd, 0);
+ result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
continue;
}
- pud = (pud_t *)info->alloc_pgt_page(info->context);
- if (!pud)
+ p4d = (p4d_t *)info->alloc_pgt_page(info->context);
+ if (!p4d)
return -ENOMEM;
- result = ident_pud_init(info, pud, addr, next);
+ result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+ } else {
+ /*
+ * With p4d folded, pgd is equal to p4d.
+ * The pgd entry has to point to the pud page table in this case.
+ */
+ pud_t *pud = pud_offset(p4d, 0);
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
}
return 0;
*/
static pmd_t * __init one_md_table_init(pgd_t *pgd)
{
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd_table;
pmd_table = (pmd_t *)alloc_low_page();
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
return pmd_table;
}
#endif
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
pmd_table = pmd_offset(pud, 0);
return pmd_table;
static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
{
- return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
- vaddr), vaddr), vaddr);
+ pgd_t *pgd = pgd_offset_k(vaddr);
+ p4d_t *p4d = p4d_offset(pgd, vaddr);
+ pud_t *pud = pud_offset(p4d, vaddr);
+ pmd_t *pmd = pmd_offset(pud, vaddr);
+ return pte_offset_kernel(pmd, vaddr);
}
static void __init kmap_init(void)
{
unsigned long vaddr;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
pgd = swapper_pg_dir + pgd_index(vaddr);
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
pmd = pmd_offset(pud, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
pkmap_page_table = pte;
{
unsigned long pfn, va;
pgd_t *pgd, *base = swapper_pg_dir;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (!pgd_present(*pgd))
break;
- pud = pud_offset(pgd, va);
+ p4d = p4d_offset(pgd, va);
+ pud = pud_offset(p4d, va);
pmd = pmd_offset(pud, va);
if (!pmd_present(*pmd))
break;
/* Don't assume we're using swapper_pg_dir at this point */
pgd_t *base = __va(read_cr3());
pgd_t *pgd = &base[pgd_index(addr)];
- pud_t *pud = pud_offset(pgd, addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
+ pud_t *pud = pud_offset(p4d, addr);
pmd_t *pmd = pmd_offset(pud, addr);
return pmd;
#include <asm/tlbflush.h>
#include <asm/sections.h>
+#include <asm/fixmap.h>
extern pgd_t early_level4_pgt[PTRS_PER_PGD];
extern struct range pfn_mapped[E820_X_MAX];
#include <linux/limits.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
+#include <linux/compat.h>
#include <asm/elf.h>
struct va_alignment __read_mostly va_align = {
.flags = -1,
};
-static unsigned long stack_maxrandom_size(void)
+unsigned long tasksize_32bit(void)
+{
+ return IA32_PAGE_OFFSET;
+}
+
+unsigned long tasksize_64bit(void)
+{
+ return TASK_SIZE_MAX;
+}
+
+static unsigned long stack_maxrandom_size(unsigned long task_size)
{
unsigned long max = 0;
if ((current->flags & PF_RANDOMIZE) &&
!(current->personality & ADDR_NO_RANDOMIZE)) {
- max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT;
+ max = (-1UL) & __STACK_RND_MASK(task_size == tasksize_32bit());
+ max <<= PAGE_SHIFT;
}
return max;
}
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave an at least ~128 MB hole with possible stack randomization.
- */
-#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
-#define MAX_GAP (TASK_SIZE/6*5)
+#ifdef CONFIG_COMPAT
+# define mmap32_rnd_bits mmap_rnd_compat_bits
+# define mmap64_rnd_bits mmap_rnd_bits
+#else
+# define mmap32_rnd_bits mmap_rnd_bits
+# define mmap64_rnd_bits mmap_rnd_bits
+#endif
+
+#define SIZE_128M (128 * 1024 * 1024UL)
static int mmap_is_legacy(void)
{
return sysctl_legacy_va_layout;
}
-unsigned long arch_mmap_rnd(void)
+static unsigned long arch_rnd(unsigned int rndbits)
{
- unsigned long rnd;
-
- if (mmap_is_ia32())
-#ifdef CONFIG_COMPAT
- rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
-#else
- rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-#endif
- else
- rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
+ return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT;
+}
- return rnd << PAGE_SHIFT;
+unsigned long arch_mmap_rnd(void)
+{
+ if (!(current->flags & PF_RANDOMIZE))
+ return 0;
+ return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
}
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
{
unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long gap_min, gap_max;
+
+ /*
+ * Top of mmap area (just below the process stack).
+ * Leave an at least ~128 MB hole with possible stack randomization.
+ */
+ gap_min = SIZE_128M + stack_maxrandom_size(task_size);
+ gap_max = (task_size / 6) * 5;
- if (gap < MIN_GAP)
- gap = MIN_GAP;
- else if (gap > MAX_GAP)
- gap = MAX_GAP;
+ if (gap < gap_min)
+ gap = gap_min;
+ else if (gap > gap_max)
+ gap = gap_max;
+
+ return PAGE_ALIGN(task_size - gap - rnd);
+}
- return PAGE_ALIGN(TASK_SIZE - gap - rnd);
+static unsigned long mmap_legacy_base(unsigned long rnd,
+ unsigned long task_size)
+{
+ return __TASK_UNMAPPED_BASE(task_size) + rnd;
}
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
+static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
+ unsigned long random_factor, unsigned long task_size)
+{
+ *legacy_base = mmap_legacy_base(random_factor, task_size);
+ if (mmap_is_legacy())
+ *base = *legacy_base;
+ else
+ *base = mmap_base(random_factor, task_size);
+}
+
void arch_pick_mmap_layout(struct mm_struct *mm)
{
- unsigned long random_factor = 0UL;
+ if (mmap_is_legacy())
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ else
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- if (current->flags & PF_RANDOMIZE)
- random_factor = arch_mmap_rnd();
+ arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
+ arch_rnd(mmap64_rnd_bits), tasksize_64bit());
+
+#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
+ /*
+ * The mmap syscall mapping base decision depends solely on the
+ * syscall type (64-bit or compat). This applies for 64bit
+ * applications and 32bit applications. The 64bit syscall uses
+ * mmap_base, the compat syscall uses mmap_compat_base.
+ */
+ arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
+ arch_rnd(mmap32_rnd_bits), tasksize_32bit());
+#endif
+}
- mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
+unsigned long get_mmap_base(int is_legacy)
+{
+ struct mm_struct *mm = current->mm;
- if (mmap_is_legacy()) {
- mm->mmap_base = mm->mmap_legacy_base;
- mm->get_unmapped_area = arch_get_unmapped_area;
- } else {
- mm->mmap_base = mmap_base(random_factor);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
+ if (in_compat_syscall()) {
+ return is_legacy ? mm->mmap_compat_legacy_base
+ : mm->mmap_compat_base;
}
+#endif
+ return is_legacy ? mm->mmap_legacy_base : mm->mmap_base;
}
const char *arch_vma_name(struct vm_area_struct *vma)
static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
{
+ p4d_t *p4d;
pud_t *pud;
int i;
if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
return;
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
pmd_t *pmd = pmds[i];
void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
BUG();
return;
}
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ if (p4d_none(*p4d)) {
+ BUG();
+ return;
+ }
+ pud = pud_offset(p4d, vaddr);
if (pud_none(*pud)) {
BUG();
return;
load_cr3(initial_page_table);
__flush_tlb_all();
- gdt_descr.address = __pa(get_cpu_gdt_table(0));
+ gdt_descr.address = __pa(get_cpu_gdt_rw(0));
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
{
struct desc_ptr gdt_descr;
- gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+ gdt_descr.address = (unsigned long)get_cpu_gdt_rw(0);
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
{
unsigned num_entries;
pgd_t *pgd_k, *pgd_efi;
+ p4d_t *p4d_k, *p4d_efi;
pud_t *pud_k, *pud_efi;
if (efi_enabled(EFI_OLD_MEMMAP))
BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
- pud_efi = pud_offset(pgd_efi, 0);
+ p4d_efi = p4d_offset(pgd_efi, 0);
+ pud_efi = pud_offset(p4d_efi, 0);
pgd_k = pgd_offset_k(EFI_VA_END);
- pud_k = pud_offset(pgd_k, 0);
+ p4d_k = p4d_offset(pgd_k, 0);
+ pud_k = pud_offset(p4d_k, 0);
num_entries = pud_index(EFI_VA_END);
memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
- pud_efi = pud_offset(pgd_efi, EFI_VA_START);
- pud_k = pud_offset(pgd_k, EFI_VA_START);
+ p4d_efi = p4d_offset(pgd_efi, EFI_VA_START);
+ pud_efi = pud_offset(p4d_efi, EFI_VA_START);
+ p4d_k = p4d_offset(pgd_k, EFI_VA_START);
+ pud_k = pud_offset(p4d_k, EFI_VA_START);
num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
* 'pmode_gdt' in wakeup_start.
*/
ctxt->gdt_desc.size = GDT_SIZE - 1;
- ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_table(smp_processor_id());
+ ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_rw(smp_processor_id());
store_tr(ctxt->tr);
int cpu = smp_processor_id();
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
#ifdef CONFIG_X86_64
- struct desc_struct *desc = get_cpu_gdt_table(cpu);
+ struct desc_struct *desc = get_cpu_gdt_rw(cpu);
tss_desc tss;
#endif
set_tss_desc(cpu, t); /*
load_mm_ldt(current->active_mm); /* This does lldt */
fpu__resume_cpu();
+
+ /* The processor is back on the direct GDT, load back the fixmap */
+ load_fixmap_gdt(cpu);
}
/**
*/
static pmd_t *resume_one_md_table_init(pgd_t *pgd)
{
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd_table;
return NULL;
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
#else
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
pmd_table = pmd_offset(pud, 0);
#endif
{
pmd_t *pmd;
pud_t *pud;
+ p4d_t *p4d;
/*
* The new mapping only has to cover the page containing the image
* the virtual address space after switching over to the original page
* tables used by the image kernel.
*/
+
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
+ if (!p4d)
+ return -ENOMEM;
+ }
+
pud = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!pud)
return -ENOMEM;
__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
set_pud(pud + pud_index(restore_jump_address),
__pud(__pa(pmd) | _KERNPG_TABLE));
- set_pgd(pgd + pgd_index(restore_jump_address),
- __pgd(__pa(pud) | _KERNPG_TABLE));
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE));
+ set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE));
+ } else {
+ /* No p4d for 4-level paging: point the pgd to the pud page table */
+ set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
return 0;
}
static int relocate_restore_code(void)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
relocated_restore_code = get_safe_page(GFP_ATOMIC);
if (!relocated_restore_code)
/* Make the page containing the relocated code executable */
pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
- pud = pud_offset(pgd, relocated_restore_code);
+ p4d = p4d_offset(pgd, relocated_restore_code);
+ if (p4d_large(*p4d)) {
+ set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
+ goto out;
+ }
+ pud = pud_offset(p4d, relocated_restore_code);
if (pud_large(*pud)) {
set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
- } else {
- pmd_t *pmd = pmd_offset(pud, relocated_restore_code);
-
- if (pmd_large(*pmd)) {
- set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
- } else {
- pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code);
-
- set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
- }
+ goto out;
+ }
+ pmd = pmd_offset(pud, relocated_restore_code);
+ if (pmd_large(*pmd)) {
+ set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
+ goto out;
}
+ pte = pte_offset_kernel(pmd, relocated_restore_code);
+ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
+out:
__flush_tlb_all();
-
return 0;
}
*shadow = t->tls_array[i];
- gdt = get_cpu_gdt_table(cpu);
+ gdt = get_cpu_gdt_rw(cpu);
maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
mc = __xen_mc_entry(0);
*/
xen_initial_gdt = &per_cpu(gdt_page, 0);
+ /* GDT can only be remapped RO */
+ pg_fixmap_gdt_flags = PAGE_KERNEL_RO;
+
xen_smp_init();
#ifdef CONFIG_ACPI_NUMA
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
+ case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
/* All local page mappings */
pte = pfn_pte(phys, prot);
break;
if (ctxt == NULL)
return -ENOMEM;
- gdt = get_cpu_gdt_table(cpu);
+ gdt = get_cpu_gdt_rw(cpu);
#ifdef CONFIG_X86_32
ctxt->user_regs.fs = __KERNEL_PERCPU;
* byte, not the size, hence the "-1").
*/
state->host_gdt_desc.size = GDT_SIZE-1;
- state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
+ state->host_gdt_desc.address = (long)get_cpu_gdt_rw(i);
/*
* All CPUs on the Host use the same Interrupt Descriptor
* The Host needs to be able to use the LGUEST segments on this
* CPU, too, so put them in the Host GDT.
*/
- get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
- get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
+ get_cpu_gdt_rw(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
+ get_cpu_gdt_rw(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
}
/*
#define Q2_SET_SEL(cpu, selname, address, size) \
do { \
- struct desc_struct *gdt = get_cpu_gdt_table((cpu)); \
+ struct desc_struct *gdt = get_cpu_gdt_rw((cpu)); \
set_desc_base(&gdt[(selname) >> 3], (u32)(address)); \
set_desc_limit(&gdt[(selname) >> 3], (size) - 1); \
} while(0)
return PNP_FUNCTION_NOT_SUPPORTED;
cpu = get_cpu();
- save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
- get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
+ save_desc_40 = get_cpu_gdt_rw(cpu)[0x40 / 8];
+ get_cpu_gdt_rw(cpu)[0x40 / 8] = bad_bios_desc;
/* On some boxes IRQ's during PnP BIOS calls are deadly. */
spin_lock_irqsave(&pnp_bios_lock, flags);
:"memory");
spin_unlock_irqrestore(&pnp_bios_lock, flags);
- get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
+ get_cpu_gdt_rw(cpu)[0x40 / 8] = save_desc_40;
put_cpu();
/* If we get here and this is set then the PnP BIOS faulted on us. */
pnp_bios_callpoint.segment = PNP_CS16;
for_each_possible_cpu(i) {
- struct desc_struct *gdt = get_cpu_gdt_table(i);
+ struct desc_struct *gdt = get_cpu_gdt_rw(i);
if (!gdt)
continue;
set_desc_base(&gdt[GDT_ENTRY_PNPBIOS_CS32],
#endif
unsigned long mmap_base; /* base of mmap area */
unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
+#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
+ /* Base adresses for compatible mmap() */
+ unsigned long mmap_compat_base;
+ unsigned long mmap_compat_legacy_base;
+#endif
unsigned long task_size; /* size of task vm space */
unsigned long highest_vm_end; /* highest vma end address */
pgd_t * pgd;
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
+#ifdef CONFIG_X86
+# include <asm/fixmap.h>
+#endif
+
#include "internal.h"
struct vfree_deferred {