[mirror_ubuntu-hirsute-kernel.git] / arch / x86 / kernel / ldt.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002 Andi Kleen
 *
 * This handles calls from both 32bit and 64bit mode.
 *
 * Lock order:
 *	contex.ldt_usr_sem
 *	  mmap_lock
 *	    context.lock
 */

#include <linux/errno.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>

#include <asm/ldt.h>
#include <asm/tlb.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
#include <asm/pgtable_areas.h>

#include <xen/xen.h>

/* This is a multiple of PAGE_SIZE. */
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)

static inline void *ldt_slot_va(int slot)
{
	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
}

void load_mm_ldt(struct mm_struct *mm)
{
	struct ldt_struct *ldt;

	/* READ_ONCE synchronizes with smp_store_release */
	ldt = READ_ONCE(mm->context.ldt);

	/*
	 * Any change to mm->context.ldt is followed by an IPI to all
	 * CPUs with the mm active.  The LDT will not be freed until
	 * after the IPI is handled by all such CPUs.  This means that,
	 * if the ldt_struct changes before we return, the values we see
	 * will be safe, and the new values will be loaded before we run
	 * any user code.
	 *
	 * NB: don't try to convert this to use RCU without extreme care.
	 * We would still need IRQs off, because we don't want to change
	 * the local LDT after an IPI loaded a newer value than the one
	 * that we can see.
	 */

	if (unlikely(ldt)) {
		if (static_cpu_has(X86_FEATURE_PTI)) {
			if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
				/*
				 * Whoops -- either the new LDT isn't mapped
				 * (if slot == -1) or is mapped into a bogus
				 * slot (if slot > 1).
				 */
				clear_LDT();
				return;
			}

			/*
			 * If page table isolation is enabled, ldt->entries
			 * will not be mapped in the userspace pagetables.
			 * Tell the CPU to access the LDT through the alias
			 * at ldt_slot_va(ldt->slot).
			 */
			set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
		} else {
			set_ldt(ldt->entries, ldt->nr_entries);
		}
	} else {
		clear_LDT();
	}
}

void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
{
	/*
	 * Load the LDT if either the old or new mm had an LDT.
	 *
	 * An mm will never go from having an LDT to not having an LDT.  Two
	 * mms never share an LDT, so we don't gain anything by checking to
	 * see whether the LDT changed.  There's also no guarantee that
	 * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
	 * then prev->context.ldt will also be non-NULL.
	 *
	 * If we really cared, we could optimize the case where prev == next
	 * and we're exiting lazy mode.  Most of the time, if this happens,
	 * we don't actually need to reload LDTR, but modify_ldt() is mostly
	 * used by legacy code and emulators where we don't need this level of
	 * performance.
	 *
	 * This uses | instead of || because it generates better code.
	 */
	if (unlikely((unsigned long)prev->context.ldt |
		     (unsigned long)next->context.ldt))
		load_mm_ldt(next);

	DEBUG_LOCKS_WARN_ON(preemptible());
}

static void refresh_ldt_segments(void)
{
#ifdef CONFIG_X86_64
	unsigned short sel;

	/*
	 * Make sure that the cached DS and ES descriptors match the updated
	 * LDT.
	 */
	savesegment(ds, sel);
	if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
		loadsegment(ds, sel);

	savesegment(es, sel);
	if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
		loadsegment(es, sel);
#endif
}

/* context.lock is held by the task which issued the smp function call */
static void flush_ldt(void *__mm)
{
	struct mm_struct *mm = __mm;

	if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
		return;

	load_mm_ldt(mm);

	refresh_ldt_segments();
}

/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
{
	struct ldt_struct *new_ldt;
	unsigned int alloc_size;

	if (num_entries > LDT_ENTRIES)
		return NULL;

	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
	if (!new_ldt)
		return NULL;

	BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
	alloc_size = num_entries * LDT_ENTRY_SIZE;

	/*
	 * Xen is very picky: it requires a page-aligned LDT that has no
	 * trailing nonzero bytes in any page that contains LDT descriptors.
	 * Keep it simple: zero the whole allocation and never allocate less
	 * than PAGE_SIZE.
	 */
	if (alloc_size > PAGE_SIZE)
		new_ldt->entries = vzalloc(alloc_size);
	else
		new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);

	if (!new_ldt->entries) {
		kfree(new_ldt);
		return NULL;
	}

	/* The new LDT isn't aliased for PTI yet. */
	new_ldt->slot = -1;

	new_ldt->nr_entries = num_entries;
	return new_ldt;
}

#ifdef CONFIG_PAGE_TABLE_ISOLATION

static void do_sanity_check(struct mm_struct *mm,
			    bool had_kernel_mapping,
			    bool had_user_mapping)
{
	if (mm->context.ldt) {
		/*
		 * We already had an LDT.  The top-level entry should already
		 * have been allocated and synchronized with the usermode
		 * tables.
		 */
		WARN_ON(!had_kernel_mapping);
		if (boot_cpu_has(X86_FEATURE_PTI))
			WARN_ON(!had_user_mapping);
	} else {
		/*
		 * This is the first time we're mapping an LDT for this process.
		 * Sync the pgd to the usermode tables.
		 */
		WARN_ON(had_kernel_mapping);
		if (boot_cpu_has(X86_FEATURE_PTI))
			WARN_ON(had_user_mapping);
	}
}

#ifdef CONFIG_X86_PAE

static pmd_t *pgd_to_pmd_walk(pgd_t *pgd, unsigned long va)
{
	p4d_t *p4d;
	pud_t *pud;

	if (pgd->pgd == 0)
		return NULL;

	p4d = p4d_offset(pgd, va);
	if (p4d_none(*p4d))
		return NULL;

	pud = pud_offset(p4d, va);
	if (pud_none(*pud))
		return NULL;

	return pmd_offset(pud, va);
}

static void map_ldt_struct_to_user(struct mm_struct *mm)
{
	pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
	pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
	pmd_t *k_pmd, *u_pmd;

	k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
	u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);

	if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
		set_pmd(u_pmd, *k_pmd);
}

static void sanity_check_ldt_mapping(struct mm_struct *mm)
{
	pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
	pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
	bool had_kernel, had_user;
	pmd_t *k_pmd, *u_pmd;

	k_pmd      = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
	u_pmd      = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
	had_kernel = (k_pmd->pmd != 0);
	had_user   = (u_pmd->pmd != 0);

	do_sanity_check(mm, had_kernel, had_user);
}

#else /* !CONFIG_X86_PAE */

static void map_ldt_struct_to_user(struct mm_struct *mm)
{
	pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);

	if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
		set_pgd(kernel_to_user_pgdp(pgd), *pgd);
}

static void sanity_check_ldt_mapping(struct mm_struct *mm)
{
	pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
	bool had_kernel = (pgd->pgd != 0);
	bool had_user   = (kernel_to_user_pgdp(pgd)->pgd != 0);

	do_sanity_check(mm, had_kernel, had_user);
}

#endif /* CONFIG_X86_PAE */

/*
 * If PTI is enabled, this maps the LDT into the kernelmode and
 * usermode tables for the given mm.
 */
static int
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
{
	unsigned long va;
	bool is_vmalloc;
	spinlock_t *ptl;
	int i, nr_pages;

	if (!boot_cpu_has(X86_FEATURE_PTI))
		return 0;

	/*
	 * Any given ldt_struct should have map_ldt_struct() called at most
	 * once.
	 */
	WARN_ON(ldt->slot != -1);

	/* Check if the current mappings are sane */
	sanity_check_ldt_mapping(mm);

	is_vmalloc = is_vmalloc_addr(ldt->entries);

	nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);

	for (i = 0; i < nr_pages; i++) {
		unsigned long offset = i << PAGE_SHIFT;
		const void *src = (char *)ldt->entries + offset;
		unsigned long pfn;
		pgprot_t pte_prot;
		pte_t pte, *ptep;

		va = (unsigned long)ldt_slot_va(slot) + offset;
		pfn = is_vmalloc ? vmalloc_to_pfn(src) :
			page_to_pfn(virt_to_page(src));
		/*
		 * Treat the PTI LDT range as a *userspace* range.
		 * get_locked_pte() will allocate all needed pagetables
		 * and account for them in this mm.
		 */
		ptep = get_locked_pte(mm, va, &ptl);
		if (!ptep)
			return -ENOMEM;
		/*
		 * Map it RO so the easy to find address is not a primary
		 * target via some kernel interface which misses a
		 * permission check.
		 */
		pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
		/* Filter out unsuppored __PAGE_KERNEL* bits: */
		pgprot_val(pte_prot) &= __supported_pte_mask;
		pte = pfn_pte(pfn, pte_prot);
		set_pte_at(mm, va, ptep, pte);
		pte_unmap_unlock(ptep, ptl);
	}

	/* Propagate LDT mapping to the user page-table */
	map_ldt_struct_to_user(mm);

	ldt->slot = slot;
	return 0;
}

static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
{
	unsigned long va;
	int i, nr_pages;

	if (!ldt)
		return;

	/* LDT map/unmap is only required for PTI */
	if (!boot_cpu_has(X86_FEATURE_PTI))
		return;

	nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);

	for (i = 0; i < nr_pages; i++) {
		unsigned long offset = i << PAGE_SHIFT;
		spinlock_t *ptl;
		pte_t *ptep;

		va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
		ptep = get_locked_pte(mm, va, &ptl);
		pte_clear(mm, va, ptep);
		pte_unmap_unlock(ptep, ptl);
	}

	va = (unsigned long)ldt_slot_va(ldt->slot);
	flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
}

#else /* !CONFIG_PAGE_TABLE_ISOLATION */

static int
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
{
	return 0;
}

static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
{
}
#endif /* CONFIG_PAGE_TABLE_ISOLATION */

static void free_ldt_pgtables(struct mm_struct *mm)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
	struct mmu_gather tlb;
	unsigned long start = LDT_BASE_ADDR;
	unsigned long end = LDT_END_ADDR;

	if (!boot_cpu_has(X86_FEATURE_PTI))
		return;

	tlb_gather_mmu(&tlb, mm, start, end);
	free_pgd_range(&tlb, start, end, start, end);
	tlb_finish_mmu(&tlb, start, end);
#endif
}

/* After calling this, the LDT is immutable. */
static void finalize_ldt_struct(struct ldt_struct *ldt)
{
	paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
}

static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
{
	mutex_lock(&mm->context.lock);

	/* Synchronizes with READ_ONCE in load_mm_ldt. */
	smp_store_release(&mm->context.ldt, ldt);

	/* Activate the LDT for all CPUs using currents mm. */
	on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);

	mutex_unlock(&mm->context.lock);
}

static void free_ldt_struct(struct ldt_struct *ldt)
{
	if (likely(!ldt))
		return;

	paravirt_free_ldt(ldt->entries, ldt->nr_entries);
	if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
		vfree_atomic(ldt->entries);
	else
		free_page((unsigned long)ldt->entries);
	kfree(ldt);
}

/*
 * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
 * the new task is not running, so nothing can be installed.
 */
int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
{
	struct ldt_struct *new_ldt;
	int retval = 0;

	if (!old_mm)
		return 0;

	mutex_lock(&old_mm->context.lock);
	if (!old_mm->context.ldt)
		goto out_unlock;

	new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
	if (!new_ldt) {
		retval = -ENOMEM;
		goto out_unlock;
	}

	memcpy(new_ldt->entries, old_mm->context.ldt->entries,
	       new_ldt->nr_entries * LDT_ENTRY_SIZE);
	finalize_ldt_struct(new_ldt);

	retval = map_ldt_struct(mm, new_ldt, 0);
	if (retval) {
		free_ldt_pgtables(mm);
		free_ldt_struct(new_ldt);
		goto out_unlock;
	}
	mm->context.ldt = new_ldt;

out_unlock:
	mutex_unlock(&old_mm->context.lock);
	return retval;
}

/*
 * No need to lock the MM as we are the last user
 *
 * 64bit: Don't touch the LDT register - we're already in the next thread.
 */
void destroy_context_ldt(struct mm_struct *mm)
{
	free_ldt_struct(mm->context.ldt);
	mm->context.ldt = NULL;
}

void ldt_arch_exit_mmap(struct mm_struct *mm)
{
	free_ldt_pgtables(mm);
}

static int read_ldt(void __user *ptr, unsigned long bytecount)
{
	struct mm_struct *mm = current->mm;
	unsigned long entries_size;
	int retval;

	down_read(&mm->context.ldt_usr_sem);

	if (!mm->context.ldt) {
		retval = 0;
		goto out_unlock;
	}

	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;

	entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
	if (entries_size > bytecount)
		entries_size = bytecount;

	if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
		retval = -EFAULT;
		goto out_unlock;
	}

	if (entries_size != bytecount) {
		/* Zero-fill the rest and pretend we read bytecount bytes. */
		if (clear_user(ptr + entries_size, bytecount - entries_size)) {
			retval = -EFAULT;
			goto out_unlock;
		}
	}
	retval = bytecount;

out_unlock:
	up_read(&mm->context.ldt_usr_sem);
	return retval;
}

static int read_default_ldt(void __user *ptr, unsigned long bytecount)
{
	/* CHECKME: Can we use _one_ random number ? */
#ifdef CONFIG_X86_32
	unsigned long size = 5 * sizeof(struct desc_struct);
#else
	unsigned long size = 128;
#endif
	if (bytecount > size)
		bytecount = size;
	if (clear_user(ptr, bytecount))
		return -EFAULT;
	return bytecount;
}

static bool allow_16bit_segments(void)
{
	if (!IS_ENABLED(CONFIG_X86_16BIT))
		return false;

#ifdef CONFIG_XEN_PV
	/*
	 * Xen PV does not implement ESPFIX64, which means that 16-bit
	 * segments will not work correctly.  Until either Xen PV implements
	 * ESPFIX64 and can signal this fact to the guest or unless someone
	 * provides compelling evidence that allowing broken 16-bit segments
	 * is worthwhile, disallow 16-bit segments under Xen PV.
	 */
	if (xen_pv_domain()) {
		pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n");
		return false;
	}
#endif

	return true;
}

static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
{
	struct mm_struct *mm = current->mm;
	struct ldt_struct *new_ldt, *old_ldt;
	unsigned int old_nr_entries, new_nr_entries;
	struct user_desc ldt_info;
	struct desc_struct ldt;
	int error;

	error = -EINVAL;
	if (bytecount != sizeof(ldt_info))
		goto out;
	error = -EFAULT;
	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
		goto out;

	error = -EINVAL;
	if (ldt_info.entry_number >= LDT_ENTRIES)
		goto out;
	if (ldt_info.contents == 3) {
		if (oldmode)
			goto out;
		if (ldt_info.seg_not_present == 0)
			goto out;
	}

	if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
	    LDT_empty(&ldt_info)) {
		/* The user wants to clear the entry. */
		memset(&ldt, 0, sizeof(ldt));
	} else {
		if (!ldt_info.seg_32bit && !allow_16bit_segments()) {
			error = -EINVAL;
			goto out;
		}

		fill_ldt(&ldt, &ldt_info);
		if (oldmode)
			ldt.avl = 0;
	}

	if (down_write_killable(&mm->context.ldt_usr_sem))
		return -EINTR;

	old_ldt       = mm->context.ldt;
	old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
	new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);

	error = -ENOMEM;
	new_ldt = alloc_ldt_struct(new_nr_entries);
	if (!new_ldt)
		goto out_unlock;

	if (old_ldt)
		memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);

	new_ldt->entries[ldt_info.entry_number] = ldt;
	finalize_ldt_struct(new_ldt);

	/*
	 * If we are using PTI, map the new LDT into the userspace pagetables.
	 * If there is already an LDT, use the other slot so that other CPUs
	 * will continue to use the old LDT until install_ldt() switches
	 * them over to the new LDT.
	 */
	error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
	if (error) {
		/*
		 * This only can fail for the first LDT setup. If an LDT is
		 * already installed then the PTE page is already
		 * populated. Mop up a half populated page table.
		 */
		if (!WARN_ON_ONCE(old_ldt))
			free_ldt_pgtables(mm);
		free_ldt_struct(new_ldt);
		goto out_unlock;
	}

	install_ldt(mm, new_ldt);
	unmap_ldt_struct(mm, old_ldt);
	free_ldt_struct(old_ldt);
	error = 0;

out_unlock:
	up_write(&mm->context.ldt_usr_sem);
out:
	return error;
}

SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
		unsigned long , bytecount)
{
	int ret = -ENOSYS;

	switch (func) {
	case 0:
		ret = read_ldt(ptr, bytecount);
		break;
	case 1:
		ret = write_ldt(ptr, bytecount, 1);
		break;
	case 2:
		ret = read_default_ldt(ptr, bytecount);
		break;
	case 0x11:
		ret = write_ldt(ptr, bytecount, 0);
		break;
	}
	/*
	 * The SYSCALL_DEFINE() macros give us an 'unsigned long'
	 * return type, but tht ABI for sys_modify_ldt() expects
	 * 'int'.  This cast gives us an int-sized value in %rax
	 * for the return code.  The 'unsigned' is necessary so
	 * the compiler does not try to sign-extend the negative
	 * return codes into the high half of the register when
	 * taking the value from int->long.
	 */
	return (unsigned int)ret;
}
Commit	Line	Data
b2441318	1	// SPDX-License-Identifier: GPL-2.0
1da177e4	2	/*
1da177e4 LT	3	* Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
	4	* Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
	5	* Copyright (C) 2002 Andi Kleen
78aa1f66	6	*
1da177e4	7	* This handles calls from both 32bit and 64bit mode.
c2b3496b PZ	8	*
	9	* Lock order:
	10	* contex.ldt_usr_sem
c1e8d7c6	11	* mmap_lock
c2b3496b	12	* context.lock
1da177e4 LT	13	*/
	14
	15	#include <linux/errno.h>
5a0e3ad6	16	#include <linux/gfp.h>
1da177e4 LT	17	#include <linux/sched.h>
	18	#include <linux/string.h>
	19	#include <linux/mm.h>
	20	#include <linux/smp.h>
da20ab35	21	#include <linux/syscalls.h>
37868fe1	22	#include <linux/slab.h>
1da177e4	23	#include <linux/vmalloc.h>
423a5405	24	#include <linux/uaccess.h>
1da177e4	25
1da177e4	26	#include <asm/ldt.h>
f55f0501	27	#include <asm/tlb.h>
1da177e4	28	#include <asm/desc.h>
70f5088d	29	#include <asm/mmu_context.h>
186525bd IM	30	#include <asm/pgtable_areas.h>
186525bd IM	31
cc801833 AL	32	#include <xen/xen.h>
cc801833 AL	33
186525bd IM	34	/* This is a multiple of PAGE_SIZE. */
	35	#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
	36
	37	static inline void *ldt_slot_va(int slot)
	38	{
	39	return (void )(LDT_BASE_ADDR + LDT_SLOT_STRIDE slot);
	40	}
	41
	42	void load_mm_ldt(struct mm_struct *mm)
	43	{
	44	struct ldt_struct *ldt;
	45
	46	/* READ_ONCE synchronizes with smp_store_release */
	47	ldt = READ_ONCE(mm->context.ldt);
	48
	49	/*
	50	* Any change to mm->context.ldt is followed by an IPI to all
	51	* CPUs with the mm active. The LDT will not be freed until
	52	* after the IPI is handled by all such CPUs. This means that,
	53	* if the ldt_struct changes before we return, the values we see
	54	* will be safe, and the new values will be loaded before we run
	55	* any user code.
	56	*
	57	* NB: don't try to convert this to use RCU without extreme care.
	58	* We would still need IRQs off, because we don't want to change
	59	* the local LDT after an IPI loaded a newer value than the one
	60	* that we can see.
	61	*/
	62
	63	if (unlikely(ldt)) {
	64	if (static_cpu_has(X86_FEATURE_PTI)) {
	65	if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
	66	/*
	67	* Whoops -- either the new LDT isn't mapped
	68	* (if slot == -1) or is mapped into a bogus
	69	* slot (if slot > 1).
	70	*/
	71	clear_LDT();
	72	return;
	73	}
	74
	75	/*
	76	* If page table isolation is enabled, ldt->entries
	77	* will not be mapped in the userspace pagetables.
	78	* Tell the CPU to access the LDT through the alias
	79	* at ldt_slot_va(ldt->slot).
	80	*/
	81	set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
	82	} else {
	83	set_ldt(ldt->entries, ldt->nr_entries);
	84	}
	85	} else {
	86	clear_LDT();
	87	}
	88	}
	89
	90	void switch_ldt(struct mm_struct prev, struct mm_struct next)
	91	{
	92	/*
	93	* Load the LDT if either the old or new mm had an LDT.
	94	*
	95	* An mm will never go from having an LDT to not having an LDT. Two
	96	* mms never share an LDT, so we don't gain anything by checking to
	97	* see whether the LDT changed. There's also no guarantee that
98	* prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
99	* then prev->context.ldt will also be non-NULL.
100	*
101	* If we really cared, we could optimize the case where prev == next
102	* and we're exiting lazy mode. Most of the time, if this happens,
103	* we don't actually need to reload LDTR, but modify_ldt() is mostly
104	* used by legacy code and emulators where we don't need this level of
105	* performance.
106	*
107	* This uses \| instead of \|\| because it generates better code.
108	*/
109	if (unlikely((unsigned long)prev->context.ldt \|
110	(unsigned long)next->context.ldt))
111	load_mm_ldt(next);
112
113	DEBUG_LOCKS_WARN_ON(preemptible());
114	}
1da177e4	115
a6323757 AL	116	static void refresh_ldt_segments(void)
	117	{
	118	#ifdef CONFIG_X86_64
	119	unsigned short sel;
	120
	121	/*
	122	* Make sure that the cached DS and ES descriptors match the updated
	123	* LDT.
	124	*/
	125	savesegment(ds, sel);
	126	if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
	127	loadsegment(ds, sel);
	128
	129	savesegment(es, sel);
	130	if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
	131	loadsegment(es, sel);
	132	#endif
	133	}
	134
c2b3496b	135	/* context.lock is held by the task which issued the smp function call */
3d28ebce	136	static void flush_ldt(void *__mm)
1da177e4	137	{
3d28ebce	138	struct mm_struct *mm = __mm;
37868fe1	139
3d28ebce	140	if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
37868fe1 AL	141	return;
37868fe1 AL	142
f55f0501	143	load_mm_ldt(mm);
a6323757 AL	144
a6323757 AL	145	refresh_ldt_segments();
1da177e4	146	}
1da177e4	147
37868fe1	148	/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
bbf79d21	149	static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
1da177e4	150	{
37868fe1	151	struct ldt_struct *new_ldt;
990e9dc3	152	unsigned int alloc_size;
37868fe1	153
bbf79d21	154	if (num_entries > LDT_ENTRIES)
37868fe1 AL	155	return NULL;
	156
	157	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
	158	if (!new_ldt)
	159	return NULL;
	160
	161	BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
bbf79d21	162	alloc_size = num_entries * LDT_ENTRY_SIZE;
37868fe1 AL	163
	164	/*
	165	* Xen is very picky: it requires a page-aligned LDT that has no
	166	* trailing nonzero bytes in any page that contains LDT descriptors.
	167	* Keep it simple: zero the whole allocation and never allocate less
	168	* than PAGE_SIZE.
	169	*/
	170	if (alloc_size > PAGE_SIZE)
	171	new_ldt->entries = vzalloc(alloc_size);
1da177e4	172	else
f454b478	173	new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
1da177e4	174
37868fe1 AL	175	if (!new_ldt->entries) {
	176	kfree(new_ldt);
	177	return NULL;
	178	}
77e463d1	179
f55f0501 AL	180	/* The new LDT isn't aliased for PTI yet. */
	181	new_ldt->slot = -1;
	182
bbf79d21	183	new_ldt->nr_entries = num_entries;
37868fe1 AL	184	return new_ldt;
37868fe1 AL	185	}
38ffbe66	186
9bae3197 JR	187	#ifdef CONFIG_PAGE_TABLE_ISOLATION
	188
	189	static void do_sanity_check(struct mm_struct *mm,
	190	bool had_kernel_mapping,
	191	bool had_user_mapping)
	192	{
	193	if (mm->context.ldt) {
	194	/*
	195	* We already had an LDT. The top-level entry should already
	196	* have been allocated and synchronized with the usermode
	197	* tables.
	198	*/
	199	WARN_ON(!had_kernel_mapping);
67e87d43	200	if (boot_cpu_has(X86_FEATURE_PTI))
9bae3197 JR	201	WARN_ON(!had_user_mapping);
	202	} else {
	203	/*
	204	* This is the first time we're mapping an LDT for this process.
	205	* Sync the pgd to the usermode tables.
	206	*/
	207	WARN_ON(had_kernel_mapping);
67e87d43	208	if (boot_cpu_has(X86_FEATURE_PTI))
9bae3197 JR	209	WARN_ON(had_user_mapping);
	210	}
	211	}
	212
6df934b9 JR	213	#ifdef CONFIG_X86_PAE
	214
	215	static pmd_t pgd_to_pmd_walk(pgd_t pgd, unsigned long va)
	216	{
	217	p4d_t *p4d;
	218	pud_t *pud;
	219
	220	if (pgd->pgd == 0)
	221	return NULL;
	222
	223	p4d = p4d_offset(pgd, va);
	224	if (p4d_none(*p4d))
	225	return NULL;
	226
	227	pud = pud_offset(p4d, va);
	228	if (pud_none(*pud))
	229	return NULL;
	230
	231	return pmd_offset(pud, va);
	232	}
	233
	234	static void map_ldt_struct_to_user(struct mm_struct *mm)
	235	{
	236	pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
	237	pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
	238	pmd_t k_pmd, u_pmd;
	239
	240	k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
	241	u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
	242
67e87d43	243	if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
6df934b9 JR	244	set_pmd(u_pmd, *k_pmd);
	245	}
	246
	247	static void sanity_check_ldt_mapping(struct mm_struct *mm)
	248	{
	249	pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
	250	pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
	251	bool had_kernel, had_user;
	252	pmd_t k_pmd, u_pmd;
	253
	254	k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
	255	u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
	256	had_kernel = (k_pmd->pmd != 0);
	257	had_user = (u_pmd->pmd != 0);
	258
	259	do_sanity_check(mm, had_kernel, had_user);
	260	}
	261
	262	#else /* !CONFIG_X86_PAE */
	263
9bae3197 JR	264	static void map_ldt_struct_to_user(struct mm_struct *mm)
	265	{
	266	pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
	267
67e87d43	268	if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
9bae3197 JR	269	set_pgd(kernel_to_user_pgdp(pgd), *pgd);
	270	}
	271
	272	static void sanity_check_ldt_mapping(struct mm_struct *mm)
	273	{
	274	pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
	275	bool had_kernel = (pgd->pgd != 0);
	276	bool had_user = (kernel_to_user_pgdp(pgd)->pgd != 0);
	277
	278	do_sanity_check(mm, had_kernel, had_user);
	279	}
	280
6df934b9 JR	281	#endif /* CONFIG_X86_PAE */
6df934b9 JR	282
f55f0501 AL	283	/*
	284	* If PTI is enabled, this maps the LDT into the kernelmode and
	285	* usermode tables for the given mm.
f55f0501 AL	286	*/
	287	static int
	288	map_ldt_struct(struct mm_struct mm, struct ldt_struct ldt, int slot)
	289	{
f55f0501	290	unsigned long va;
9bae3197	291	bool is_vmalloc;
f55f0501	292	spinlock_t *ptl;
a0e6e083	293	int i, nr_pages;
f55f0501	294
67e87d43	295	if (!boot_cpu_has(X86_FEATURE_PTI))
f55f0501 AL	296	return 0;
	297
	298	/*
	299	* Any given ldt_struct should have map_ldt_struct() called at most
	300	* once.
	301	*/
	302	WARN_ON(ldt->slot != -1);
	303
9bae3197 JR	304	/* Check if the current mappings are sane */
	305	sanity_check_ldt_mapping(mm);
	306
f55f0501 AL	307	is_vmalloc = is_vmalloc_addr(ldt->entries);
f55f0501 AL	308
a0e6e083 KS	309	nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
	310
	311	for (i = 0; i < nr_pages; i++) {
f55f0501 AL	312	unsigned long offset = i << PAGE_SHIFT;
	313	const void src = (char )ldt->entries + offset;
	314	unsigned long pfn;
fb43d6cb	315	pgprot_t pte_prot;
f55f0501 AL	316	pte_t pte, *ptep;
	317
	318	va = (unsigned long)ldt_slot_va(slot) + offset;
	319	pfn = is_vmalloc ? vmalloc_to_pfn(src) :
	320	page_to_pfn(virt_to_page(src));
	321	/*
	322	* Treat the PTI LDT range as a userspace range.
	323	* get_locked_pte() will allocate all needed pagetables
	324	* and account for them in this mm.
	325	*/
	326	ptep = get_locked_pte(mm, va, &ptl);
	327	if (!ptep)
	328	return -ENOMEM;
9f5cb6b3 TG	329	/*
	330	* Map it RO so the easy to find address is not a primary
	331	* target via some kernel interface which misses a
	332	* permission check.
	333	*/
fb43d6cb DH	334	pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
fb43d6cb DH	335	/* Filter out unsuppored __PAGE_KERNEL* bits: */
e6f39e87	336	pgprot_val(pte_prot) &= __supported_pte_mask;
fb43d6cb	337	pte = pfn_pte(pfn, pte_prot);
f55f0501 AL	338	set_pte_at(mm, va, ptep, pte);
	339	pte_unmap_unlock(ptep, ptl);
	340	}
	341
9bae3197 JR	342	/* Propagate LDT mapping to the user page-table */
9bae3197 JR	343	map_ldt_struct_to_user(mm);
f55f0501	344
f55f0501	345	ldt->slot = slot;
f55f0501 AL	346	return 0;
	347	}
	348
a0e6e083 KS	349	static void unmap_ldt_struct(struct mm_struct mm, struct ldt_struct ldt)
	350	{
	351	unsigned long va;
	352	int i, nr_pages;
	353
	354	if (!ldt)
	355	return;
	356
	357	/* LDT map/unmap is only required for PTI */
67e87d43	358	if (!boot_cpu_has(X86_FEATURE_PTI))
a0e6e083 KS	359	return;
	360
	361	nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
	362
	363	for (i = 0; i < nr_pages; i++) {
	364	unsigned long offset = i << PAGE_SHIFT;
	365	spinlock_t *ptl;
	366	pte_t *ptep;
	367
	368	va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
	369	ptep = get_locked_pte(mm, va, &ptl);
	370	pte_clear(mm, va, ptep);
	371	pte_unmap_unlock(ptep, ptl);
	372	}
	373
	374	va = (unsigned long)ldt_slot_va(ldt->slot);
	375	flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
	376	}
	377
9bae3197 JR	378	#else /* !CONFIG_PAGE_TABLE_ISOLATION */
	379
	380	static int
	381	map_ldt_struct(struct mm_struct mm, struct ldt_struct ldt, int slot)
	382	{
	383	return 0;
	384	}
a0e6e083 KS	385
	386	static void unmap_ldt_struct(struct mm_struct mm, struct ldt_struct ldt)
	387	{
	388	}
9bae3197 JR	389	#endif /* CONFIG_PAGE_TABLE_ISOLATION */
9bae3197 JR	390
f55f0501 AL	391	static void free_ldt_pgtables(struct mm_struct *mm)
	392	{
	393	#ifdef CONFIG_PAGE_TABLE_ISOLATION
	394	struct mmu_gather tlb;
	395	unsigned long start = LDT_BASE_ADDR;
8195d869	396	unsigned long end = LDT_END_ADDR;
f55f0501	397
67e87d43	398	if (!boot_cpu_has(X86_FEATURE_PTI))
f55f0501 AL	399	return;
	400
	401	tlb_gather_mmu(&tlb, mm, start, end);
	402	free_pgd_range(&tlb, start, end, start, end);
	403	tlb_finish_mmu(&tlb, start, end);
	404	#endif
	405	}
	406
37868fe1 AL	407	/* After calling this, the LDT is immutable. */
	408	static void finalize_ldt_struct(struct ldt_struct *ldt)
	409	{
bbf79d21	410	paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
1da177e4 LT	411	}
1da177e4 LT	412
c2b3496b	413	static void install_ldt(struct mm_struct mm, struct ldt_struct ldt)
1da177e4	414	{
c2b3496b PZ	415	mutex_lock(&mm->context.lock);
c2b3496b PZ	416
3382290e	417	/* Synchronizes with READ_ONCE in load_mm_ldt. */
c2b3496b	418	smp_store_release(&mm->context.ldt, ldt);
37868fe1	419
c2b3496b PZ	420	/* Activate the LDT for all CPUs using currents mm. */
	421	on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
	422
	423	mutex_unlock(&mm->context.lock);
37868fe1	424	}
78aa1f66	425
37868fe1 AL	426	static void free_ldt_struct(struct ldt_struct *ldt)
	427	{
	428	if (likely(!ldt))
	429	return;
38ffbe66	430
bbf79d21 BP	431	paravirt_free_ldt(ldt->entries, ldt->nr_entries);
bbf79d21 BP	432	if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
8d5341a6	433	vfree_atomic(ldt->entries);
37868fe1	434	else
f454b478	435	free_page((unsigned long)ldt->entries);
37868fe1	436	kfree(ldt);
1da177e4 LT	437	}
	438
	439	/*
a4828f81 TG	440	* Called on fork from arch_dup_mmap(). Just copy the current LDT state,
a4828f81 TG	441	* the new task is not running, so nothing can be installed.
1da177e4	442	*/
a4828f81	443	int ldt_dup_context(struct mm_struct old_mm, struct mm_struct mm)
1da177e4	444	{
37868fe1	445	struct ldt_struct *new_ldt;
1da177e4 LT	446	int retval = 0;
1da177e4 LT	447
a4828f81	448	if (!old_mm)
37868fe1	449	return 0;
37868fe1 AL	450
37868fe1 AL	451	mutex_lock(&old_mm->context.lock);
a4828f81	452	if (!old_mm->context.ldt)
37868fe1	453	goto out_unlock;
37868fe1	454
bbf79d21	455	new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
37868fe1 AL	456	if (!new_ldt) {
	457	retval = -ENOMEM;
	458	goto out_unlock;
	459	}
	460
	461	memcpy(new_ldt->entries, old_mm->context.ldt->entries,
bbf79d21	462	new_ldt->nr_entries * LDT_ENTRY_SIZE);
37868fe1 AL	463	finalize_ldt_struct(new_ldt);
37868fe1 AL	464
f55f0501 AL	465	retval = map_ldt_struct(mm, new_ldt, 0);
	466	if (retval) {
	467	free_ldt_pgtables(mm);
	468	free_ldt_struct(new_ldt);
	469	goto out_unlock;
	470	}
37868fe1 AL	471	mm->context.ldt = new_ldt;
	472
	473	out_unlock:
	474	mutex_unlock(&old_mm->context.lock);
1da177e4 LT	475	return retval;
	476	}
	477
	478	/*
77e463d1 TG	479	* No need to lock the MM as we are the last user
	480	*
	481	* 64bit: Don't touch the LDT register - we're already in the next thread.
1da177e4	482	*/
39a0526f	483	void destroy_context_ldt(struct mm_struct *mm)
1da177e4	484	{
37868fe1 AL	485	free_ldt_struct(mm->context.ldt);
37868fe1 AL	486	mm->context.ldt = NULL;
1da177e4 LT	487	}
1da177e4 LT	488
f55f0501 AL	489	void ldt_arch_exit_mmap(struct mm_struct *mm)
	490	{
	491	free_ldt_pgtables(mm);
	492	}
	493
78aa1f66	494	static int read_ldt(void __user *ptr, unsigned long bytecount)
1da177e4	495	{
78aa1f66	496	struct mm_struct *mm = current->mm;
bbf79d21 BP	497	unsigned long entries_size;
bbf79d21 BP	498	int retval;
1da177e4	499
c2b3496b	500	down_read(&mm->context.ldt_usr_sem);
37868fe1 AL	501
	502	if (!mm->context.ldt) {
	503	retval = 0;
	504	goto out_unlock;
	505	}
	506
78aa1f66 TG	507	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
78aa1f66 TG	508	bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
1da177e4	509
bbf79d21 BP	510	entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
	511	if (entries_size > bytecount)
	512	entries_size = bytecount;
1da177e4	513
bbf79d21	514	if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
37868fe1 AL	515	retval = -EFAULT;
	516	goto out_unlock;
	517	}
	518
bbf79d21	519	if (entries_size != bytecount) {
37868fe1	520	/* Zero-fill the rest and pretend we read bytecount bytes. */
bbf79d21	521	if (clear_user(ptr + entries_size, bytecount - entries_size)) {
37868fe1 AL	522	retval = -EFAULT;
37868fe1 AL	523	goto out_unlock;
1da177e4 LT	524	}
1da177e4 LT	525	}
37868fe1 AL	526	retval = bytecount;
	527
	528	out_unlock:
c2b3496b	529	up_read(&mm->context.ldt_usr_sem);
37868fe1	530	return retval;
1da177e4 LT	531	}
1da177e4 LT	532
78aa1f66	533	static int read_default_ldt(void __user *ptr, unsigned long bytecount)
1da177e4	534	{
77e463d1 TG	535	/* CHECKME: Can we use _one_ random number ? */
	536	#ifdef CONFIG_X86_32
	537	unsigned long size = 5 * sizeof(struct desc_struct);
	538	#else
	539	unsigned long size = 128;
	540	#endif
	541	if (bytecount > size)
	542	bytecount = size;
1da177e4 LT	543	if (clear_user(ptr, bytecount))
1da177e4 LT	544	return -EFAULT;
78aa1f66	545	return bytecount;
1da177e4 LT	546	}
1da177e4 LT	547
cc801833 AL	548	static bool allow_16bit_segments(void)
	549	{
	550	if (!IS_ENABLED(CONFIG_X86_16BIT))
	551	return false;
	552
	553	#ifdef CONFIG_XEN_PV
	554	/*
	555	* Xen PV does not implement ESPFIX64, which means that 16-bit
	556	* segments will not work correctly. Until either Xen PV implements
	557	* ESPFIX64 and can signal this fact to the guest or unless someone
	558	* provides compelling evidence that allowing broken 16-bit segments
	559	* is worthwhile, disallow 16-bit segments under Xen PV.
	560	*/
	561	if (xen_pv_domain()) {
bb5a93aa	562	pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n");
cc801833 AL	563	return false;
	564	}
	565	#endif
	566
	567	return true;
	568	}
	569
78aa1f66	570	static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
1da177e4	571	{
70f5088d	572	struct mm_struct *mm = current->mm;
990e9dc3	573	struct ldt_struct new_ldt, old_ldt;
bbf79d21	574	unsigned int old_nr_entries, new_nr_entries;
990e9dc3	575	struct user_desc ldt_info;
5af72502	576	struct desc_struct ldt;
1da177e4	577	int error;
1da177e4 LT	578
1da177e4 LT	579	error = -EINVAL;
1da177e4 LT	580	if (bytecount != sizeof(ldt_info))
1da177e4 LT	581	goto out;
78aa1f66	582	error = -EFAULT;
70f5088d	583	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
1da177e4 LT	584	goto out;
	585
	586	error = -EINVAL;
	587	if (ldt_info.entry_number >= LDT_ENTRIES)
	588	goto out;
	589	if (ldt_info.contents == 3) {
	590	if (oldmode)
	591	goto out;
	592	if (ldt_info.seg_not_present == 0)
	593	goto out;
	594	}
	595
37868fe1 AL	596	if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) \|\|
	597	LDT_empty(&ldt_info)) {
	598	/* The user wants to clear the entry. */
	599	memset(&ldt, 0, sizeof(ldt));
	600	} else {
cc801833	601	if (!ldt_info.seg_32bit && !allow_16bit_segments()) {
37868fe1 AL	602	error = -EINVAL;
37868fe1 AL	603	goto out;
1da177e4	604	}
37868fe1 AL	605
	606	fill_ldt(&ldt, &ldt_info);
	607	if (oldmode)
	608	ldt.avl = 0;
1da177e4 LT	609	}
1da177e4 LT	610
c2b3496b PZ	611	if (down_write_killable(&mm->context.ldt_usr_sem))
c2b3496b PZ	612	return -EINTR;
37868fe1	613
bbf79d21 BP	614	old_ldt = mm->context.ldt;
	615	old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
	616	new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);
37868fe1 AL	617
37868fe1 AL	618	error = -ENOMEM;
bbf79d21	619	new_ldt = alloc_ldt_struct(new_nr_entries);
37868fe1	620	if (!new_ldt)
34273f41	621	goto out_unlock;
34273f41	622
37868fe1	623	if (old_ldt)
bbf79d21 BP	624	memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);
bbf79d21 BP	625
37868fe1 AL	626	new_ldt->entries[ldt_info.entry_number] = ldt;
37868fe1 AL	627	finalize_ldt_struct(new_ldt);
1da177e4	628
f55f0501 AL	629	/*
	630	* If we are using PTI, map the new LDT into the userspace pagetables.
	631	* If there is already an LDT, use the other slot so that other CPUs
	632	* will continue to use the old LDT until install_ldt() switches
	633	* them over to the new LDT.
	634	*/
	635	error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
	636	if (error) {
a62d6985 TG	637	/*
	638	* This only can fail for the first LDT setup. If an LDT is
	639	* already installed then the PTE page is already
	640	* populated. Mop up a half populated page table.
	641	*/
7f414195 TG	642	if (!WARN_ON_ONCE(old_ldt))
7f414195 TG	643	free_ldt_pgtables(mm);
a62d6985	644	free_ldt_struct(new_ldt);
f55f0501 AL	645	goto out_unlock;
	646	}
	647
37868fe1	648	install_ldt(mm, new_ldt);
a0e6e083	649	unmap_ldt_struct(mm, old_ldt);
37868fe1	650	free_ldt_struct(old_ldt);
1da177e4 LT	651	error = 0;
	652
	653	out_unlock:
c2b3496b	654	up_write(&mm->context.ldt_usr_sem);
1da177e4 LT	655	out:
	656	return error;
	657	}
	658
da20ab35 DH	659	SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
da20ab35 DH	660	unsigned long , bytecount)
1da177e4 LT	661	{
	662	int ret = -ENOSYS;
	663
	664	switch (func) {
	665	case 0:
	666	ret = read_ldt(ptr, bytecount);
	667	break;
	668	case 1:
	669	ret = write_ldt(ptr, bytecount, 1);
	670	break;
	671	case 2:
	672	ret = read_default_ldt(ptr, bytecount);
	673	break;
	674	case 0x11:
	675	ret = write_ldt(ptr, bytecount, 0);
	676	break;
	677	}
da20ab35 DH	678	/*
	679	* The SYSCALL_DEFINE() macros give us an 'unsigned long'
	680	* return type, but tht ABI for sys_modify_ldt() expects
	681	* 'int'. This cast gives us an int-sized value in %rax
	682	* for the return code. The 'unsigned' is necessary so
	683	* the compiler does not try to sign-extend the negative
	684	* return codes into the high half of the register when
	685	* taking the value from int->long.
	686	*/
	687	return (unsigned int)ret;
1da177e4	688	}