[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / ldt.c

/*
 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002 Andi Kleen
 *
 * This handles calls from both 32bit and 64bit mode.
 *
 * Lock order:
 *	contex.ldt_usr_sem
 *	  mmap_sem
 *	    context.lock
 */

#include <linux/errno.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>

#include <asm/ldt.h>
#include <asm/tlb.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
#include <asm/syscalls.h>

static void refresh_ldt_segments(void)
{
#ifdef CONFIG_X86_64
	unsigned short sel;

	/*
	 * Make sure that the cached DS and ES descriptors match the updated
	 * LDT.
	 */
	savesegment(ds, sel);
	if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
		loadsegment(ds, sel);

	savesegment(es, sel);
	if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
		loadsegment(es, sel);
#endif
}

/* context.lock is held by the task which issued the smp function call */
static void flush_ldt(void *__mm)
{
	struct mm_struct *mm = __mm;

	if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
		return;

	load_mm_ldt(mm);

	refresh_ldt_segments();
}

/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
{
	struct ldt_struct *new_ldt;
	unsigned int alloc_size;

	if (num_entries > LDT_ENTRIES)
		return NULL;

	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
	if (!new_ldt)
		return NULL;

	BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
	alloc_size = num_entries * LDT_ENTRY_SIZE;

	/*
	 * Xen is very picky: it requires a page-aligned LDT that has no
	 * trailing nonzero bytes in any page that contains LDT descriptors.
	 * Keep it simple: zero the whole allocation and never allocate less
	 * than PAGE_SIZE.
	 */
	if (alloc_size > PAGE_SIZE)
		new_ldt->entries = vzalloc(alloc_size);
	else
		new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);

	if (!new_ldt->entries) {
		kfree(new_ldt);
		return NULL;
	}

	/* The new LDT isn't aliased for PTI yet. */
	new_ldt->slot = -1;

	new_ldt->nr_entries = num_entries;
	return new_ldt;
}

/*
 * If PTI is enabled, this maps the LDT into the kernelmode and
 * usermode tables for the given mm.
 *
 * There is no corresponding unmap function.  Even if the LDT is freed, we
 * leave the PTEs around until the slot is reused or the mm is destroyed.
 * This is harmless: the LDT is always in ordinary memory, and no one will
 * access the freed slot.
 *
 * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
 * it useful, and the flush would slow down modify_ldt().
 */
static int
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
	bool is_vmalloc, had_top_level_entry;
	unsigned long va;
	spinlock_t *ptl;
	pgd_t *pgd;
	int i;

	if (!static_cpu_has(X86_FEATURE_PTI))
		return 0;

	/*
	 * Any given ldt_struct should have map_ldt_struct() called at most
	 * once.
	 */
	WARN_ON(ldt->slot != -1);

	/*
	 * Did we already have the top level entry allocated?  We can't
	 * use pgd_none() for this because it doens't do anything on
	 * 4-level page table kernels.
	 */
	pgd = pgd_offset(mm, LDT_BASE_ADDR);
	had_top_level_entry = (pgd->pgd != 0);

	is_vmalloc = is_vmalloc_addr(ldt->entries);

	for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
		unsigned long offset = i << PAGE_SHIFT;
		const void *src = (char *)ldt->entries + offset;
		unsigned long pfn;
		pte_t pte, *ptep;

		va = (unsigned long)ldt_slot_va(slot) + offset;
		pfn = is_vmalloc ? vmalloc_to_pfn(src) :
			page_to_pfn(virt_to_page(src));
		/*
		 * Treat the PTI LDT range as a *userspace* range.
		 * get_locked_pte() will allocate all needed pagetables
		 * and account for them in this mm.
		 */
		ptep = get_locked_pte(mm, va, &ptl);
		if (!ptep)
			return -ENOMEM;
		/*
		 * Map it RO so the easy to find address is not a primary
		 * target via some kernel interface which misses a
		 * permission check.
		 */
		pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
		set_pte_at(mm, va, ptep, pte);
		pte_unmap_unlock(ptep, ptl);
	}

	if (mm->context.ldt) {
		/*
		 * We already had an LDT.  The top-level entry should already
		 * have been allocated and synchronized with the usermode
		 * tables.
		 */
		WARN_ON(!had_top_level_entry);
		if (static_cpu_has(X86_FEATURE_PTI))
			WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
	} else {
		/*
		 * This is the first time we're mapping an LDT for this process.
		 * Sync the pgd to the usermode tables.
		 */
		WARN_ON(had_top_level_entry);
		if (static_cpu_has(X86_FEATURE_PTI)) {
			WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
			set_pgd(kernel_to_user_pgdp(pgd), *pgd);
		}
	}

	va = (unsigned long)ldt_slot_va(slot);
	flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);

	ldt->slot = slot;
#endif
	return 0;
}

static void free_ldt_pgtables(struct mm_struct *mm)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
	struct mmu_gather tlb;
	unsigned long start = LDT_BASE_ADDR;
	unsigned long end = start + (1UL << PGDIR_SHIFT);

	if (!static_cpu_has(X86_FEATURE_PTI))
		return;

	tlb_gather_mmu(&tlb, mm, start, end);
	free_pgd_range(&tlb, start, end, start, end);
	tlb_finish_mmu(&tlb, start, end);
#endif
}

/* After calling this, the LDT is immutable. */
static void finalize_ldt_struct(struct ldt_struct *ldt)
{
	paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
}

static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
{
	mutex_lock(&mm->context.lock);

	/* Synchronizes with READ_ONCE in load_mm_ldt. */
	smp_store_release(&mm->context.ldt, ldt);

	/* Activate the LDT for all CPUs using currents mm. */
	on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);

	mutex_unlock(&mm->context.lock);
}

static void free_ldt_struct(struct ldt_struct *ldt)
{
	if (likely(!ldt))
		return;

	paravirt_free_ldt(ldt->entries, ldt->nr_entries);
	if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
		vfree_atomic(ldt->entries);
	else
		free_page((unsigned long)ldt->entries);
	kfree(ldt);
}

/*
 * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
 * the new task is not running, so nothing can be installed.
 */
int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
{
	struct ldt_struct *new_ldt;
	int retval = 0;

	if (!old_mm)
		return 0;

	mutex_lock(&old_mm->context.lock);
	if (!old_mm->context.ldt)
		goto out_unlock;

	new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
	if (!new_ldt) {
		retval = -ENOMEM;
		goto out_unlock;
	}

	memcpy(new_ldt->entries, old_mm->context.ldt->entries,
	       new_ldt->nr_entries * LDT_ENTRY_SIZE);
	finalize_ldt_struct(new_ldt);

	retval = map_ldt_struct(mm, new_ldt, 0);
	if (retval) {
		free_ldt_pgtables(mm);
		free_ldt_struct(new_ldt);
		goto out_unlock;
	}
	mm->context.ldt = new_ldt;

out_unlock:
	mutex_unlock(&old_mm->context.lock);
	return retval;
}

/*
 * No need to lock the MM as we are the last user
 *
 * 64bit: Don't touch the LDT register - we're already in the next thread.
 */
void destroy_context_ldt(struct mm_struct *mm)
{
	free_ldt_struct(mm->context.ldt);
	mm->context.ldt = NULL;
}

void ldt_arch_exit_mmap(struct mm_struct *mm)
{
	free_ldt_pgtables(mm);
}

static int read_ldt(void __user *ptr, unsigned long bytecount)
{
	struct mm_struct *mm = current->mm;
	unsigned long entries_size;
	int retval;

	down_read(&mm->context.ldt_usr_sem);

	if (!mm->context.ldt) {
		retval = 0;
		goto out_unlock;
	}

	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;

	entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
	if (entries_size > bytecount)
		entries_size = bytecount;

	if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
		retval = -EFAULT;
		goto out_unlock;
	}

	if (entries_size != bytecount) {
		/* Zero-fill the rest and pretend we read bytecount bytes. */
		if (clear_user(ptr + entries_size, bytecount - entries_size)) {
			retval = -EFAULT;
			goto out_unlock;
		}
	}
	retval = bytecount;

out_unlock:
	up_read(&mm->context.ldt_usr_sem);
	return retval;
}

static int read_default_ldt(void __user *ptr, unsigned long bytecount)
{
	/* CHECKME: Can we use _one_ random number ? */
#ifdef CONFIG_X86_32
	unsigned long size = 5 * sizeof(struct desc_struct);
#else
	unsigned long size = 128;
#endif
	if (bytecount > size)
		bytecount = size;
	if (clear_user(ptr, bytecount))
		return -EFAULT;
	return bytecount;
}

static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
{
	struct mm_struct *mm = current->mm;
	struct ldt_struct *new_ldt, *old_ldt;
	unsigned int old_nr_entries, new_nr_entries;
	struct user_desc ldt_info;
	struct desc_struct ldt;
	int error;

	error = -EINVAL;
	if (bytecount != sizeof(ldt_info))
		goto out;
	error = -EFAULT;
	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
		goto out;

	error = -EINVAL;
	if (ldt_info.entry_number >= LDT_ENTRIES)
		goto out;
	if (ldt_info.contents == 3) {
		if (oldmode)
			goto out;
		if (ldt_info.seg_not_present == 0)
			goto out;
	}

	if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
	    LDT_empty(&ldt_info)) {
		/* The user wants to clear the entry. */
		memset(&ldt, 0, sizeof(ldt));
	} else {
		if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
			error = -EINVAL;
			goto out;
		}

		fill_ldt(&ldt, &ldt_info);
		if (oldmode)
			ldt.avl = 0;
	}

	if (down_write_killable(&mm->context.ldt_usr_sem))
		return -EINTR;

	old_ldt       = mm->context.ldt;
	old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
	new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);

	error = -ENOMEM;
	new_ldt = alloc_ldt_struct(new_nr_entries);
	if (!new_ldt)
		goto out_unlock;

	if (old_ldt)
		memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);

	new_ldt->entries[ldt_info.entry_number] = ldt;
	finalize_ldt_struct(new_ldt);

	/*
	 * If we are using PTI, map the new LDT into the userspace pagetables.
	 * If there is already an LDT, use the other slot so that other CPUs
	 * will continue to use the old LDT until install_ldt() switches
	 * them over to the new LDT.
	 */
	error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
	if (error) {
		/*
		 * This only can fail for the first LDT setup. If an LDT is
		 * already installed then the PTE page is already
		 * populated. Mop up a half populated page table.
		 */
		if (!WARN_ON_ONCE(old_ldt))
			free_ldt_pgtables(mm);
		free_ldt_struct(new_ldt);
		goto out_unlock;
	}

	install_ldt(mm, new_ldt);
	free_ldt_struct(old_ldt);
	error = 0;

out_unlock:
	up_write(&mm->context.ldt_usr_sem);
out:
	return error;
}

SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
		unsigned long , bytecount)
{
	int ret = -ENOSYS;

	switch (func) {
	case 0:
		ret = read_ldt(ptr, bytecount);
		break;
	case 1:
		ret = write_ldt(ptr, bytecount, 1);
		break;
	case 2:
		ret = read_default_ldt(ptr, bytecount);
		break;
	case 0x11:
		ret = write_ldt(ptr, bytecount, 0);
		break;
	}
	/*
	 * The SYSCALL_DEFINE() macros give us an 'unsigned long'
	 * return type, but tht ABI for sys_modify_ldt() expects
	 * 'int'.  This cast gives us an int-sized value in %rax
	 * for the return code.  The 'unsigned' is necessary so
	 * the compiler does not try to sign-extend the negative
	 * return codes into the high half of the register when
	 * taking the value from int->long.
	 */
	return (unsigned int)ret;
}
Commit	Line	Data
1da177e4	1	/*
1da177e4 LT	2	* Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
	3	* Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
	4	* Copyright (C) 2002 Andi Kleen
78aa1f66	5	*
1da177e4	6	* This handles calls from both 32bit and 64bit mode.
bf7ee649 PZ	7	*
	8	* Lock order:
	9	* contex.ldt_usr_sem
	10	* mmap_sem
	11	* context.lock
1da177e4 LT	12	*/
	13
	14	#include <linux/errno.h>
5a0e3ad6	15	#include <linux/gfp.h>
1da177e4 LT	16	#include <linux/sched.h>
	17	#include <linux/string.h>
	18	#include <linux/mm.h>
	19	#include <linux/smp.h>
d865f635	20	#include <linux/syscalls.h>
37868fe1	21	#include <linux/slab.h>
1da177e4	22	#include <linux/vmalloc.h>
423a5405	23	#include <linux/uaccess.h>
1da177e4	24
1da177e4	25	#include <asm/ldt.h>
c2506438	26	#include <asm/tlb.h>
1da177e4	27	#include <asm/desc.h>
70f5088d	28	#include <asm/mmu_context.h>
bbc1f698	29	#include <asm/syscalls.h>
1da177e4	30
295cb0b0 AL	31	static void refresh_ldt_segments(void)
	32	{
	33	#ifdef CONFIG_X86_64
	34	unsigned short sel;
	35
	36	/*
	37	* Make sure that the cached DS and ES descriptors match the updated
	38	* LDT.
	39	*/
	40	savesegment(ds, sel);
	41	if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
	42	loadsegment(ds, sel);
	43
	44	savesegment(es, sel);
	45	if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
	46	loadsegment(es, sel);
	47	#endif
	48	}
	49
bf7ee649	50	/* context.lock is held by the task which issued the smp function call */
3d28ebce	51	static void flush_ldt(void *__mm)
1da177e4	52	{
3d28ebce	53	struct mm_struct *mm = __mm;
37868fe1	54
3d28ebce	55	if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
37868fe1 AL	56	return;
37868fe1 AL	57
c2506438	58	load_mm_ldt(mm);
295cb0b0 AL	59
295cb0b0 AL	60	refresh_ldt_segments();
1da177e4	61	}
1da177e4	62
37868fe1	63	/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
bbf79d21	64	static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
1da177e4	65	{
37868fe1	66	struct ldt_struct *new_ldt;
990e9dc3	67	unsigned int alloc_size;
37868fe1	68
bbf79d21	69	if (num_entries > LDT_ENTRIES)
37868fe1 AL	70	return NULL;
	71
	72	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
	73	if (!new_ldt)
	74	return NULL;
	75
	76	BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
bbf79d21	77	alloc_size = num_entries * LDT_ENTRY_SIZE;
37868fe1 AL	78
	79	/*
	80	* Xen is very picky: it requires a page-aligned LDT that has no
	81	* trailing nonzero bytes in any page that contains LDT descriptors.
	82	* Keep it simple: zero the whole allocation and never allocate less
	83	* than PAGE_SIZE.
	84	*/
	85	if (alloc_size > PAGE_SIZE)
	86	new_ldt->entries = vzalloc(alloc_size);
1da177e4	87	else
f454b478	88	new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
1da177e4	89
37868fe1 AL	90	if (!new_ldt->entries) {
	91	kfree(new_ldt);
	92	return NULL;
	93	}
77e463d1	94
c2506438 AL	95	/* The new LDT isn't aliased for PTI yet. */
	96	new_ldt->slot = -1;
	97
bbf79d21	98	new_ldt->nr_entries = num_entries;
37868fe1 AL	99	return new_ldt;
37868fe1 AL	100	}
38ffbe66	101
c2506438 AL	102	/*
	103	* If PTI is enabled, this maps the LDT into the kernelmode and
	104	* usermode tables for the given mm.
	105	*
	106	* There is no corresponding unmap function. Even if the LDT is freed, we
	107	* leave the PTEs around until the slot is reused or the mm is destroyed.
	108	* This is harmless: the LDT is always in ordinary memory, and no one will
	109	* access the freed slot.
	110	*
	111	* If we wanted to unmap freed LDTs, we'd also need to do a flush to make
	112	* it useful, and the flush would slow down modify_ldt().
	113	*/
	114	static int
	115	map_ldt_struct(struct mm_struct mm, struct ldt_struct ldt, int slot)
	116	{
	117	#ifdef CONFIG_PAGE_TABLE_ISOLATION
	118	bool is_vmalloc, had_top_level_entry;
	119	unsigned long va;
	120	spinlock_t *ptl;
	121	pgd_t *pgd;
	122	int i;
	123
	124	if (!static_cpu_has(X86_FEATURE_PTI))
	125	return 0;
	126
	127	/*
	128	* Any given ldt_struct should have map_ldt_struct() called at most
	129	* once.
	130	*/
	131	WARN_ON(ldt->slot != -1);
	132
	133	/*
	134	* Did we already have the top level entry allocated? We can't
	135	* use pgd_none() for this because it doens't do anything on
	136	* 4-level page table kernels.
	137	*/
	138	pgd = pgd_offset(mm, LDT_BASE_ADDR);
	139	had_top_level_entry = (pgd->pgd != 0);
	140
	141	is_vmalloc = is_vmalloc_addr(ldt->entries);
	142
	143	for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
	144	unsigned long offset = i << PAGE_SHIFT;
	145	const void src = (char )ldt->entries + offset;
	146	unsigned long pfn;
	147	pte_t pte, *ptep;
	148
	149	va = (unsigned long)ldt_slot_va(slot) + offset;
	150	pfn = is_vmalloc ? vmalloc_to_pfn(src) :
	151	page_to_pfn(virt_to_page(src));
	152	/*
	153	* Treat the PTI LDT range as a userspace range.
	154	* get_locked_pte() will allocate all needed pagetables
	155	* and account for them in this mm.
	156	*/
	157	ptep = get_locked_pte(mm, va, &ptl);
	158	if (!ptep)
	159	return -ENOMEM;
f4b13d6f TG	160	/*
	161	* Map it RO so the easy to find address is not a primary
	162	* target via some kernel interface which misses a
	163	* permission check.
	164	*/
	165	pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
c2506438 AL	166	set_pte_at(mm, va, ptep, pte);
	167	pte_unmap_unlock(ptep, ptl);
	168	}
	169
	170	if (mm->context.ldt) {
	171	/*
	172	* We already had an LDT. The top-level entry should already
	173	* have been allocated and synchronized with the usermode
	174	* tables.
	175	*/
	176	WARN_ON(!had_top_level_entry);
	177	if (static_cpu_has(X86_FEATURE_PTI))
	178	WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
	179	} else {
	180	/*
	181	* This is the first time we're mapping an LDT for this process.
	182	* Sync the pgd to the usermode tables.
	183	*/
	184	WARN_ON(had_top_level_entry);
	185	if (static_cpu_has(X86_FEATURE_PTI)) {
	186	WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
	187	set_pgd(kernel_to_user_pgdp(pgd), *pgd);
	188	}
	189	}
	190
	191	va = (unsigned long)ldt_slot_va(slot);
	192	flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
	193
	194	ldt->slot = slot;
	195	#endif
	196	return 0;
	197	}
	198
	199	static void free_ldt_pgtables(struct mm_struct *mm)
	200	{
	201	#ifdef CONFIG_PAGE_TABLE_ISOLATION
	202	struct mmu_gather tlb;
	203	unsigned long start = LDT_BASE_ADDR;
	204	unsigned long end = start + (1UL << PGDIR_SHIFT);
	205
	206	if (!static_cpu_has(X86_FEATURE_PTI))
	207	return;
	208
	209	tlb_gather_mmu(&tlb, mm, start, end);
	210	free_pgd_range(&tlb, start, end, start, end);
	211	tlb_finish_mmu(&tlb, start, end);
	212	#endif
	213	}
	214
37868fe1 AL	215	/* After calling this, the LDT is immutable. */
	216	static void finalize_ldt_struct(struct ldt_struct *ldt)
	217	{
bbf79d21	218	paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
1da177e4 LT	219	}
1da177e4 LT	220
bf7ee649	221	static void install_ldt(struct mm_struct mm, struct ldt_struct ldt)
1da177e4	222	{
bf7ee649 PZ	223	mutex_lock(&mm->context.lock);
bf7ee649 PZ	224
7252704b	225	/* Synchronizes with READ_ONCE in load_mm_ldt. */
bf7ee649	226	smp_store_release(&mm->context.ldt, ldt);
37868fe1	227
bf7ee649 PZ	228	/* Activate the LDT for all CPUs using currents mm. */
	229	on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
	230
	231	mutex_unlock(&mm->context.lock);
37868fe1	232	}
78aa1f66	233
37868fe1 AL	234	static void free_ldt_struct(struct ldt_struct *ldt)
	235	{
	236	if (likely(!ldt))
	237	return;
38ffbe66	238
bbf79d21 BP	239	paravirt_free_ldt(ldt->entries, ldt->nr_entries);
bbf79d21 BP	240	if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
8d5341a6	241	vfree_atomic(ldt->entries);
37868fe1	242	else
f454b478	243	free_page((unsigned long)ldt->entries);
37868fe1	244	kfree(ldt);
1da177e4 LT	245	}
	246
	247	/*
f90d2542 TG	248	* Called on fork from arch_dup_mmap(). Just copy the current LDT state,
f90d2542 TG	249	* the new task is not running, so nothing can be installed.
1da177e4	250	*/
f90d2542	251	int ldt_dup_context(struct mm_struct old_mm, struct mm_struct mm)
1da177e4	252	{
37868fe1	253	struct ldt_struct *new_ldt;
1da177e4 LT	254	int retval = 0;
1da177e4 LT	255
f90d2542	256	if (!old_mm)
37868fe1	257	return 0;
37868fe1 AL	258
37868fe1 AL	259	mutex_lock(&old_mm->context.lock);
f90d2542	260	if (!old_mm->context.ldt)
37868fe1	261	goto out_unlock;
37868fe1	262
bbf79d21	263	new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
37868fe1 AL	264	if (!new_ldt) {
	265	retval = -ENOMEM;
	266	goto out_unlock;
	267	}
	268
	269	memcpy(new_ldt->entries, old_mm->context.ldt->entries,
bbf79d21	270	new_ldt->nr_entries * LDT_ENTRY_SIZE);
37868fe1 AL	271	finalize_ldt_struct(new_ldt);
37868fe1 AL	272
c2506438 AL	273	retval = map_ldt_struct(mm, new_ldt, 0);
	274	if (retval) {
	275	free_ldt_pgtables(mm);
	276	free_ldt_struct(new_ldt);
	277	goto out_unlock;
	278	}
37868fe1 AL	279	mm->context.ldt = new_ldt;
	280
	281	out_unlock:
	282	mutex_unlock(&old_mm->context.lock);
1da177e4 LT	283	return retval;
	284	}
	285
	286	/*
77e463d1 TG	287	* No need to lock the MM as we are the last user
	288	*
	289	* 64bit: Don't touch the LDT register - we're already in the next thread.
1da177e4	290	*/
39a0526f	291	void destroy_context_ldt(struct mm_struct *mm)
1da177e4	292	{
37868fe1 AL	293	free_ldt_struct(mm->context.ldt);
37868fe1 AL	294	mm->context.ldt = NULL;
1da177e4 LT	295	}
1da177e4 LT	296
c2506438 AL	297	void ldt_arch_exit_mmap(struct mm_struct *mm)
	298	{
	299	free_ldt_pgtables(mm);
	300	}
	301
78aa1f66	302	static int read_ldt(void __user *ptr, unsigned long bytecount)
1da177e4	303	{
78aa1f66	304	struct mm_struct *mm = current->mm;
bbf79d21 BP	305	unsigned long entries_size;
bbf79d21 BP	306	int retval;
1da177e4	307
bf7ee649	308	down_read(&mm->context.ldt_usr_sem);
37868fe1 AL	309
	310	if (!mm->context.ldt) {
	311	retval = 0;
	312	goto out_unlock;
	313	}
	314
78aa1f66 TG	315	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
78aa1f66 TG	316	bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
1da177e4	317
bbf79d21 BP	318	entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
	319	if (entries_size > bytecount)
	320	entries_size = bytecount;
1da177e4	321
bbf79d21	322	if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
37868fe1 AL	323	retval = -EFAULT;
	324	goto out_unlock;
	325	}
	326
bbf79d21	327	if (entries_size != bytecount) {
37868fe1	328	/* Zero-fill the rest and pretend we read bytecount bytes. */
bbf79d21	329	if (clear_user(ptr + entries_size, bytecount - entries_size)) {
37868fe1 AL	330	retval = -EFAULT;
37868fe1 AL	331	goto out_unlock;
1da177e4 LT	332	}
1da177e4 LT	333	}
37868fe1 AL	334	retval = bytecount;
	335
	336	out_unlock:
bf7ee649	337	up_read(&mm->context.ldt_usr_sem);
37868fe1	338	return retval;
1da177e4 LT	339	}
1da177e4 LT	340
78aa1f66	341	static int read_default_ldt(void __user *ptr, unsigned long bytecount)
1da177e4	342	{
77e463d1 TG	343	/* CHECKME: Can we use _one_ random number ? */
	344	#ifdef CONFIG_X86_32
	345	unsigned long size = 5 * sizeof(struct desc_struct);
	346	#else
	347	unsigned long size = 128;
	348	#endif
	349	if (bytecount > size)
	350	bytecount = size;
1da177e4 LT	351	if (clear_user(ptr, bytecount))
1da177e4 LT	352	return -EFAULT;
78aa1f66	353	return bytecount;
1da177e4 LT	354	}
1da177e4 LT	355
78aa1f66	356	static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
1da177e4	357	{
70f5088d	358	struct mm_struct *mm = current->mm;
990e9dc3	359	struct ldt_struct new_ldt, old_ldt;
bbf79d21	360	unsigned int old_nr_entries, new_nr_entries;
990e9dc3	361	struct user_desc ldt_info;
5af72502	362	struct desc_struct ldt;
1da177e4	363	int error;
1da177e4 LT	364
1da177e4 LT	365	error = -EINVAL;
1da177e4 LT	366	if (bytecount != sizeof(ldt_info))
1da177e4 LT	367	goto out;
78aa1f66	368	error = -EFAULT;
70f5088d	369	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
1da177e4 LT	370	goto out;
	371
	372	error = -EINVAL;
	373	if (ldt_info.entry_number >= LDT_ENTRIES)
	374	goto out;
	375	if (ldt_info.contents == 3) {
	376	if (oldmode)
	377	goto out;
	378	if (ldt_info.seg_not_present == 0)
	379	goto out;
	380	}
	381
37868fe1 AL	382	if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) \|\|
	383	LDT_empty(&ldt_info)) {
	384	/* The user wants to clear the entry. */
	385	memset(&ldt, 0, sizeof(ldt));
	386	} else {
	387	if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
	388	error = -EINVAL;
	389	goto out;
1da177e4	390	}
37868fe1 AL	391
	392	fill_ldt(&ldt, &ldt_info);
	393	if (oldmode)
	394	ldt.avl = 0;
1da177e4 LT	395	}
1da177e4 LT	396
bf7ee649 PZ	397	if (down_write_killable(&mm->context.ldt_usr_sem))
bf7ee649 PZ	398	return -EINTR;
37868fe1	399
bbf79d21 BP	400	old_ldt = mm->context.ldt;
	401	old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
	402	new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);
37868fe1 AL	403
37868fe1 AL	404	error = -ENOMEM;
bbf79d21	405	new_ldt = alloc_ldt_struct(new_nr_entries);
37868fe1	406	if (!new_ldt)
34273f41	407	goto out_unlock;
34273f41	408
37868fe1	409	if (old_ldt)
bbf79d21 BP	410	memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);
bbf79d21 BP	411
37868fe1 AL	412	new_ldt->entries[ldt_info.entry_number] = ldt;
37868fe1 AL	413	finalize_ldt_struct(new_ldt);
1da177e4	414
c2506438 AL	415	/*
	416	* If we are using PTI, map the new LDT into the userspace pagetables.
	417	* If there is already an LDT, use the other slot so that other CPUs
	418	* will continue to use the old LDT until install_ldt() switches
	419	* them over to the new LDT.
	420	*/
	421	error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
	422	if (error) {
03d02494 TG	423	/*
	424	* This only can fail for the first LDT setup. If an LDT is
	425	* already installed then the PTE page is already
	426	* populated. Mop up a half populated page table.
	427	*/
4e23d9d8 TG	428	if (!WARN_ON_ONCE(old_ldt))
4e23d9d8 TG	429	free_ldt_pgtables(mm);
03d02494	430	free_ldt_struct(new_ldt);
c2506438 AL	431	goto out_unlock;
	432	}
	433
37868fe1 AL	434	install_ldt(mm, new_ldt);
37868fe1 AL	435	free_ldt_struct(old_ldt);
1da177e4 LT	436	error = 0;
	437
	438	out_unlock:
bf7ee649	439	up_write(&mm->context.ldt_usr_sem);
1da177e4 LT	440	out:
	441	return error;
	442	}
	443
d865f635 DH	444	SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
d865f635 DH	445	unsigned long , bytecount)
1da177e4 LT	446	{
	447	int ret = -ENOSYS;
	448
	449	switch (func) {
	450	case 0:
	451	ret = read_ldt(ptr, bytecount);
	452	break;
	453	case 1:
	454	ret = write_ldt(ptr, bytecount, 1);
	455	break;
	456	case 2:
	457	ret = read_default_ldt(ptr, bytecount);
	458	break;
	459	case 0x11:
	460	ret = write_ldt(ptr, bytecount, 0);
	461	break;
	462	}
d865f635 DH	463	/*
	464	* The SYSCALL_DEFINE() macros give us an 'unsigned long'
	465	* return type, but tht ABI for sys_modify_ldt() expects
	466	* 'int'. This cast gives us an int-sized value in %rax
	467	* for the return code. The 'unsigned' is necessary so
	468	* the compiler does not try to sign-extend the negative
	469	* return codes into the high half of the register when
	470	* taking the value from int->long.
	471	*/
	472	return (unsigned int)ret;
1da177e4	473	}