[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / uprobes.c

/*
 * Userspace Probes (UProbes) for x86
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright (C) IBM Corporation, 2008-2011
 * Authors:
 *	Srikar Dronamraju
 *	Jim Keniston
 */

#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/ptrace.h>
#include <linux/uprobes.h>

#include <linux/kdebug.h>
#include <asm/insn.h>

/* Post-execution fixups. */

/* No fixup needed */
#define UPROBES_FIX_NONE	0x0
/* Adjust IP back to vicinity of actual insn */
#define UPROBES_FIX_IP		0x1
/* Adjust the return address of a call insn */
#define UPROBES_FIX_CALL	0x2

#define UPROBES_FIX_RIP_AX	0x8000
#define UPROBES_FIX_RIP_CX	0x4000

/* Adaptations for mhiramat x86 decoder v14. */
#define OPCODE1(insn) ((insn)->opcode.bytes[0])
#define OPCODE2(insn) ((insn)->opcode.bytes[1])
#define OPCODE3(insn) ((insn)->opcode.bytes[2])
#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value)

#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
	 << (row % 32))

#ifdef CONFIG_X86_64
static volatile u32 good_insns_64[256 / 32] = {
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
	/*      ----------------------------------------------         */
	W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
	W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
	W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
	W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
	W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
	W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
	W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
	/*      ----------------------------------------------         */
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
};
#endif

/* Good-instruction tables for 32-bit apps */

static volatile u32 good_insns_32[256 / 32] = {
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
	/*      ----------------------------------------------         */
	W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
	W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
	W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
	W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
	W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
	W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1)   /* f0 */
	/*      ----------------------------------------------         */
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
};

/* Using this for both 64-bit and 32-bit apps */
static volatile u32 good_2byte_insns[256 / 32] = {
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
	/*      ----------------------------------------------         */
	W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
	W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
	W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
	W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
	W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)   /* f0 */
	/*      ----------------------------------------------         */
	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f         */
};

#undef W

/*
 * opcodes we'll probably never support:
 * 6c-6d, e4-e5, ec-ed - in
 * 6e-6f, e6-e7, ee-ef - out
 * cc, cd - int3, int
 * cf - iret
 * d6 - illegal instruction
 * f1 - int1/icebp
 * f4 - hlt
 * fa, fb - cli, sti
 * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
 *
 * invalid opcodes in 64-bit mode:
 * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
 *
 * 63 - we support this opcode in x86_64 but not in i386.
 *
 * opcodes we may need to refine support for:
 * 0f - 2-byte instructions: For many of these instructions, the validity
 * depends on the prefix and/or the reg field.  On such instructions, we
 * just consider the opcode combination valid if it corresponds to any
 * valid instruction.
 * 8f - Group 1 - only reg = 0 is OK
 * c6-c7 - Group 11 - only reg = 0 is OK
 * d9-df - fpu insns with some illegal encodings
 * f2, f3 - repnz, repz prefixes.  These are also the first byte for
 * certain floating-point instructions, such as addsd.
 * fe - Group 4 - only reg = 0 or 1 is OK
 * ff - Group 5 - only reg = 0-6 is OK
 *
 * others -- Do we need to support these?
 * 0f - (floating-point?) prefetch instructions
 * 07, 17, 1f - pop es, pop ss, pop ds
 * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
 *	but 64 and 65 (fs: and gs:) seem to be used, so we support them
 * 67 - addr16 prefix
 * ce - into
 * f0 - lock prefix
 */

/*
 * TODO:
 * - Where necessary, examine the modrm byte and allow only valid instructions
 * in the different Groups and fpu instructions.
 */

static bool is_prefix_bad(struct insn *insn)
{
	int i;

	for (i = 0; i < insn->prefixes.nbytes; i++) {
		switch (insn->prefixes.bytes[i]) {
		case 0x26:	/*INAT_PFX_ES   */
		case 0x2E:	/*INAT_PFX_CS   */
		case 0x36:	/*INAT_PFX_DS   */
		case 0x3E:	/*INAT_PFX_SS   */
		case 0xF0:	/*INAT_PFX_LOCK */
			return true;
		}
	}
	return false;
}

static int validate_insn_32bits(struct uprobe *uprobe, struct insn *insn)
{
	insn_init(insn, uprobe->insn, false);

	/* Skip good instruction prefixes; reject "bad" ones. */
	insn_get_opcode(insn);
	if (is_prefix_bad(insn))
		return -ENOTSUPP;
	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
		return 0;
	if (insn->opcode.nbytes == 2) {
		if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
			return 0;
	}
	return -ENOTSUPP;
}

/*
 * Figure out which fixups post_xol() will need to perform, and annotate
 * uprobe->arch_info.fixups accordingly.  To start with,
 * uprobe->arch_info.fixups is either zero or it reflects rip-related
 * fixups.
 */
static void prepare_fixups(struct uprobe *uprobe, struct insn *insn)
{
	bool fix_ip = true, fix_call = false;	/* defaults */
	int reg;

	insn_get_opcode(insn);	/* should be a nop */

	switch (OPCODE1(insn)) {
	case 0xc3:		/* ret/lret */
	case 0xcb:
	case 0xc2:
	case 0xca:
		/* ip is correct */
		fix_ip = false;
		break;
	case 0xe8:		/* call relative - Fix return addr */
		fix_call = true;
		break;
	case 0x9a:		/* call absolute - Fix return addr, not ip */
		fix_call = true;
		fix_ip = false;
		break;
	case 0xff:
		insn_get_modrm(insn);
		reg = MODRM_REG(insn);
		if (reg == 2 || reg == 3) {
			/* call or lcall, indirect */
			/* Fix return addr; ip is correct. */
			fix_call = true;
			fix_ip = false;
		} else if (reg == 4 || reg == 5) {
			/* jmp or ljmp, indirect */
			/* ip is correct. */
			fix_ip = false;
		}
		break;
	case 0xea:		/* jmp absolute -- ip is correct */
		fix_ip = false;
		break;
	default:
		break;
	}
	if (fix_ip)
		uprobe->arch_info.fixups |= UPROBES_FIX_IP;
	if (fix_call)
		uprobe->arch_info.fixups |= UPROBES_FIX_CALL;
}

#ifdef CONFIG_X86_64
/*
 * If uprobe->insn doesn't use rip-relative addressing, return
 * immediately.  Otherwise, rewrite the instruction so that it accesses
 * its memory operand indirectly through a scratch register.  Set
 * uprobe->arch_info.fixups and uprobe->arch_info.rip_rela_target_address
 * accordingly.  (The contents of the scratch register will be saved
 * before we single-step the modified instruction, and restored
 * afterward.)
 *
 * We do this because a rip-relative instruction can access only a
 * relatively small area (+/- 2 GB from the instruction), and the XOL
 * area typically lies beyond that area.  At least for instructions
 * that store to memory, we can't execute the original instruction
 * and "fix things up" later, because the misdirected store could be
 * disastrous.
 *
 * Some useful facts about rip-relative instructions:
 * - There's always a modrm byte.
 * - There's never a SIB byte.
 * - The displacement is always 4 bytes.
 */
static void handle_riprel_insn(struct mm_struct *mm, struct uprobe *uprobe,
							struct insn *insn)
{
	u8 *cursor;
	u8 reg;

	if (mm->context.ia32_compat)
		return;

	uprobe->arch_info.rip_rela_target_address = 0x0;
	if (!insn_rip_relative(insn))
		return;

	/*
	 * insn_rip_relative() would have decoded rex_prefix, modrm.
	 * Clear REX.b bit (extension of MODRM.rm field):
	 * we want to encode rax/rcx, not r8/r9.
	 */
	if (insn->rex_prefix.nbytes) {
		cursor = uprobe->insn + insn_offset_rex_prefix(insn);
		*cursor &= 0xfe;	/* Clearing REX.B bit */
	}

	/*
	 * Point cursor at the modrm byte.  The next 4 bytes are the
	 * displacement.  Beyond the displacement, for some instructions,
	 * is the immediate operand.
	 */
	cursor = uprobe->insn + insn_offset_modrm(insn);
	insn_get_length(insn);

	/*
	 * Convert from rip-relative addressing to indirect addressing
	 * via a scratch register.  Change the r/m field from 0x5 (%rip)
	 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
	 */
	reg = MODRM_REG(insn);
	if (reg == 0) {
		/*
		 * The register operand (if any) is either the A register
		 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
		 * REX prefix) %r8.  In any case, we know the C register
		 * is NOT the register operand, so we use %rcx (register
		 * #1) for the scratch register.
		 */
		uprobe->arch_info.fixups = UPROBES_FIX_RIP_CX;
		/* Change modrm from 00 000 101 to 00 000 001. */
		*cursor = 0x1;
	} else {
		/* Use %rax (register #0) for the scratch register. */
		uprobe->arch_info.fixups = UPROBES_FIX_RIP_AX;
		/* Change modrm from 00 xxx 101 to 00 xxx 000 */
		*cursor = (reg << 3);
	}

	/* Target address = address of next instruction + (signed) offset */
	uprobe->arch_info.rip_rela_target_address = (long)insn->length
					+ insn->displacement.value;
	/* Displacement field is gone; slide immediate field (if any) over. */
	if (insn->immediate.nbytes) {
		cursor++;
		memmove(cursor, cursor + insn->displacement.nbytes,
						insn->immediate.nbytes);
	}
	return;
}

static int validate_insn_64bits(struct uprobe *uprobe, struct insn *insn)
{
	insn_init(insn, uprobe->insn, true);

	/* Skip good instruction prefixes; reject "bad" ones. */
	insn_get_opcode(insn);
	if (is_prefix_bad(insn))
		return -ENOTSUPP;
	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
		return 0;
	if (insn->opcode.nbytes == 2) {
		if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
			return 0;
	}
	return -ENOTSUPP;
}

static int validate_insn_bits(struct mm_struct *mm, struct uprobe *uprobe,
				struct insn *insn)
{
	if (mm->context.ia32_compat)
		return validate_insn_32bits(uprobe, insn);
	return validate_insn_64bits(uprobe, insn);
}
#else
static void handle_riprel_insn(struct mm_struct *mm, struct uprobe *uprobe,
							struct insn *insn)
{
	return;
}

static int validate_insn_bits(struct mm_struct *mm, struct uprobe *uprobe,
				struct insn *insn)
{
	return validate_insn_32bits(uprobe, insn);
}
#endif /* CONFIG_X86_64 */

/**
 * analyze_insn - instruction analysis including validity and fixups.
 * @mm: the probed address space.
 * @uprobe: the probepoint information.
 * Return 0 on success or a -ve number on error.
 */
int analyze_insn(struct mm_struct *mm, struct uprobe *uprobe)
{
	int ret;
	struct insn insn;

	uprobe->arch_info.fixups = 0;
	ret = validate_insn_bits(mm, uprobe, &insn);
	if (ret != 0)
		return ret;
	handle_riprel_insn(mm, uprobe, &insn);
	prepare_fixups(uprobe, &insn);
	return 0;
}
Commit	Line	Data
2b144498 SD	1	/*
	2	* Userspace Probes (UProbes) for x86
	3	*
	4	* This program is free software; you can redistribute it and/or modify
	5	* it under the terms of the GNU General Public License as published by
	6	* the Free Software Foundation; either version 2 of the License, or
	7	* (at your option) any later version.
	8	*
	9	* This program is distributed in the hope that it will be useful,
	10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	* GNU General Public License for more details.
	13	*
	14	* You should have received a copy of the GNU General Public License
	15	* along with this program; if not, write to the Free Software
	16	* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	17	*
	18	* Copyright (C) IBM Corporation, 2008-2011
	19	* Authors:
	20	* Srikar Dronamraju
	21	* Jim Keniston
	22	*/
	23
	24	#include <linux/kernel.h>
	25	#include <linux/sched.h>
	26	#include <linux/ptrace.h>
	27	#include <linux/uprobes.h>
	28
	29	#include <linux/kdebug.h>
	30	#include <asm/insn.h>
	31
	32	/* Post-execution fixups. */
	33
	34	/* No fixup needed */
	35	#define UPROBES_FIX_NONE 0x0
	36	/* Adjust IP back to vicinity of actual insn */
	37	#define UPROBES_FIX_IP 0x1
	38	/* Adjust the return address of a call insn */
	39	#define UPROBES_FIX_CALL 0x2
	40
	41	#define UPROBES_FIX_RIP_AX 0x8000
	42	#define UPROBES_FIX_RIP_CX 0x4000
	43
	44	/* Adaptations for mhiramat x86 decoder v14. */
	45	#define OPCODE1(insn) ((insn)->opcode.bytes[0])
	46	#define OPCODE2(insn) ((insn)->opcode.bytes[1])
	47	#define OPCODE3(insn) ((insn)->opcode.bytes[2])
	48	#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value)
	49
	50	#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
	51	(((b0##UL << 0x0)\|(b1##UL << 0x1)\|(b2##UL << 0x2)\|(b3##UL << 0x3) \| \
	52	(b4##UL << 0x4)\|(b5##UL << 0x5)\|(b6##UL << 0x6)\|(b7##UL << 0x7) \| \
	53	(b8##UL << 0x8)\|(b9##UL << 0x9)\|(ba##UL << 0xa)\|(bb##UL << 0xb) \| \
	54	(bc##UL << 0xc)\|(bd##UL << 0xd)\|(be##UL << 0xe)\|(bf##UL << 0xf)) \
	55	<< (row % 32))
	56
	57	#ifdef CONFIG_X86_64
	58	static volatile u32 good_insns_64[256 / 32] = {
	59	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
	60	/* ---------------------------------------------- */
	61	W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) \| /* 00 */
	62	W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
	63	W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) \| /* 20 */
	64	W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
65	W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) \| /* 40 */
66	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
67	W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) \| /* 60 */
68	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
69	W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 80 */
70	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
71	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* a0 */
72	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
73	W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) \| /* c0 */
74	W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
75	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) \| /* e0 */
76	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
77	/* ---------------------------------------------- */
78	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
79	};
80	#endif
81
82	/* Good-instruction tables for 32-bit apps */
83
84	static volatile u32 good_insns_32[256 / 32] = {
85	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
86	/* ---------------------------------------------- */
87	W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) \| /* 00 */
88	W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
89	W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) \| /* 20 */
90	W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
91	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 40 */
92	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
93	W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) \| /* 60 */
94	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
95	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 80 */
96	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
97	W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* a0 */
98	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
99	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) \| /* c0 */
100	W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
101	W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) \| /* e0 */
102	W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
103	/* ---------------------------------------------- */
104	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
105	};
106
107	/* Using this for both 64-bit and 32-bit apps */
108	static volatile u32 good_2byte_insns[256 / 32] = {
109	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
110	/* ---------------------------------------------- */
111	W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) \| /* 00 */
112	W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
113	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 20 */
114	W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
115	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 40 */
116	W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
117	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 60 */
118	W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
119	W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* 80 */
120	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
121	W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) \| /* a0 */
122	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
123	W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* c0 */
124	W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
125	W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) \| /* e0 */
126	W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
127	/* ---------------------------------------------- */
128	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
129	};
130
131	#undef W
132
133	/*
134	* opcodes we'll probably never support:
135	* 6c-6d, e4-e5, ec-ed - in
136	* 6e-6f, e6-e7, ee-ef - out
137	* cc, cd - int3, int
138	* cf - iret
139	* d6 - illegal instruction
140	* f1 - int1/icebp
141	* f4 - hlt
142	* fa, fb - cli, sti
143	* 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
144	*
145	* invalid opcodes in 64-bit mode:
146	* 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
147	*
148	* 63 - we support this opcode in x86_64 but not in i386.
149	*
150	* opcodes we may need to refine support for:
151	* 0f - 2-byte instructions: For many of these instructions, the validity
152	* depends on the prefix and/or the reg field. On such instructions, we
153	* just consider the opcode combination valid if it corresponds to any
154	* valid instruction.
155	* 8f - Group 1 - only reg = 0 is OK
156	* c6-c7 - Group 11 - only reg = 0 is OK
157	* d9-df - fpu insns with some illegal encodings
158	* f2, f3 - repnz, repz prefixes. These are also the first byte for
159	* certain floating-point instructions, such as addsd.
160	* fe - Group 4 - only reg = 0 or 1 is OK
161	* ff - Group 5 - only reg = 0-6 is OK
162	*
163	* others -- Do we need to support these?
164	* 0f - (floating-point?) prefetch instructions
165	* 07, 17, 1f - pop es, pop ss, pop ds
166	* 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
167	* but 64 and 65 (fs: and gs:) seem to be used, so we support them
168	* 67 - addr16 prefix
169	* ce - into
170	* f0 - lock prefix
171	*/
172
173	/*
174	* TODO:
175	* - Where necessary, examine the modrm byte and allow only valid instructions
176	* in the different Groups and fpu instructions.
177	*/
178
179	static bool is_prefix_bad(struct insn *insn)
180	{
181	int i;
182
183	for (i = 0; i < insn->prefixes.nbytes; i++) {
184	switch (insn->prefixes.bytes[i]) {
185	case 0x26: /INAT_PFX_ES /
186	case 0x2E: /INAT_PFX_CS /
187	case 0x36: /INAT_PFX_DS /
188	case 0x3E: /INAT_PFX_SS /
189	case 0xF0: /INAT_PFX_LOCK /
190	return true;
191	}
192	}
193	return false;
194	}
195
196	static int validate_insn_32bits(struct uprobe uprobe, struct insn insn)
197	{
198	insn_init(insn, uprobe->insn, false);
199
200	/* Skip good instruction prefixes; reject "bad" ones. */
201	insn_get_opcode(insn);
202	if (is_prefix_bad(insn))
203	return -ENOTSUPP;
204	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
205	return 0;
206	if (insn->opcode.nbytes == 2) {
207	if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
208	return 0;
209	}
210	return -ENOTSUPP;
211	}
212
213	/*
214	* Figure out which fixups post_xol() will need to perform, and annotate
215	* uprobe->arch_info.fixups accordingly. To start with,
216	* uprobe->arch_info.fixups is either zero or it reflects rip-related
217	* fixups.
218	*/
219	static void prepare_fixups(struct uprobe uprobe, struct insn insn)
220	{
221	bool fix_ip = true, fix_call = false; /* defaults */
222	int reg;
223
224	insn_get_opcode(insn); /* should be a nop */
225
226	switch (OPCODE1(insn)) {
227	case 0xc3: /* ret/lret */
228	case 0xcb:
229	case 0xc2:
230	case 0xca:
231	/* ip is correct */
232	fix_ip = false;
233	break;
234	case 0xe8: /* call relative - Fix return addr */
235	fix_call = true;
236	break;
237	case 0x9a: /* call absolute - Fix return addr, not ip */
238	fix_call = true;
239	fix_ip = false;
240	break;
241	case 0xff:
242	insn_get_modrm(insn);
243	reg = MODRM_REG(insn);
244	if (reg == 2 \|\| reg == 3) {
245	/* call or lcall, indirect */
246	/* Fix return addr; ip is correct. */
247	fix_call = true;
248	fix_ip = false;
249	} else if (reg == 4 \|\| reg == 5) {
250	/* jmp or ljmp, indirect */
251	/* ip is correct. */
252	fix_ip = false;
253	}
254	break;
255	case 0xea: /* jmp absolute -- ip is correct */
256	fix_ip = false;
257	break;
258	default:
259	break;
260	}
261	if (fix_ip)
262	uprobe->arch_info.fixups \|= UPROBES_FIX_IP;
263	if (fix_call)
264	uprobe->arch_info.fixups \|= UPROBES_FIX_CALL;
265	}
266
267	#ifdef CONFIG_X86_64
268	/*
269	* If uprobe->insn doesn't use rip-relative addressing, return
270	* immediately. Otherwise, rewrite the instruction so that it accesses
271	* its memory operand indirectly through a scratch register. Set
272	* uprobe->arch_info.fixups and uprobe->arch_info.rip_rela_target_address
273	* accordingly. (The contents of the scratch register will be saved
274	* before we single-step the modified instruction, and restored
275	* afterward.)
276	*
277	* We do this because a rip-relative instruction can access only a
278	* relatively small area (+/- 2 GB from the instruction), and the XOL
279	* area typically lies beyond that area. At least for instructions
280	* that store to memory, we can't execute the original instruction
281	* and "fix things up" later, because the misdirected store could be
282	* disastrous.
283	*
284	* Some useful facts about rip-relative instructions:
285	* - There's always a modrm byte.
286	* - There's never a SIB byte.
287	* - The displacement is always 4 bytes.
288	*/
289	static void handle_riprel_insn(struct mm_struct mm, struct uprobe uprobe,
290	struct insn *insn)
291	{
292	u8 *cursor;
293	u8 reg;
294
295	if (mm->context.ia32_compat)
296	return;
297
298	uprobe->arch_info.rip_rela_target_address = 0x0;
299	if (!insn_rip_relative(insn))
300	return;
301
302	/*
303	* insn_rip_relative() would have decoded rex_prefix, modrm.
304	* Clear REX.b bit (extension of MODRM.rm field):
305	* we want to encode rax/rcx, not r8/r9.
306	*/
307	if (insn->rex_prefix.nbytes) {
308	cursor = uprobe->insn + insn_offset_rex_prefix(insn);
309	cursor &= 0xfe; / Clearing REX.B bit */
310	}
311
312	/*
313	* Point cursor at the modrm byte. The next 4 bytes are the
314	* displacement. Beyond the displacement, for some instructions,
315	* is the immediate operand.
316	*/
317	cursor = uprobe->insn + insn_offset_modrm(insn);
318	insn_get_length(insn);
319
320	/*
321	* Convert from rip-relative addressing to indirect addressing
322	* via a scratch register. Change the r/m field from 0x5 (%rip)
323	* to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
324	*/
325	reg = MODRM_REG(insn);
326	if (reg == 0) {
327	/*
328	* The register operand (if any) is either the A register
329	* (%rax, %eax, etc.) or (if the 0x4 bit is set in the
330	* REX prefix) %r8. In any case, we know the C register
331	* is NOT the register operand, so we use %rcx (register
332	* #1) for the scratch register.
333	*/
334	uprobe->arch_info.fixups = UPROBES_FIX_RIP_CX;
335	/* Change modrm from 00 000 101 to 00 000 001. */
336	*cursor = 0x1;
337	} else {
338	/* Use %rax (register #0) for the scratch register. */
339	uprobe->arch_info.fixups = UPROBES_FIX_RIP_AX;
340	/* Change modrm from 00 xxx 101 to 00 xxx 000 */
341	*cursor = (reg << 3);
342	}
343
344	/* Target address = address of next instruction + (signed) offset */
345	uprobe->arch_info.rip_rela_target_address = (long)insn->length
346	+ insn->displacement.value;
347	/* Displacement field is gone; slide immediate field (if any) over. */
348	if (insn->immediate.nbytes) {
349	cursor++;
350	memmove(cursor, cursor + insn->displacement.nbytes,
351	insn->immediate.nbytes);
352	}
353	return;
354	}
355
356	static int validate_insn_64bits(struct uprobe uprobe, struct insn insn)
357	{
358	insn_init(insn, uprobe->insn, true);
359
360	/* Skip good instruction prefixes; reject "bad" ones. */
361	insn_get_opcode(insn);
362	if (is_prefix_bad(insn))
363	return -ENOTSUPP;
364	if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
365	return 0;
366	if (insn->opcode.nbytes == 2) {
367	if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
368	return 0;
369	}
370	return -ENOTSUPP;
371	}
372
373	static int validate_insn_bits(struct mm_struct mm, struct uprobe uprobe,
374	struct insn *insn)
375	{
376	if (mm->context.ia32_compat)
377	return validate_insn_32bits(uprobe, insn);
378	return validate_insn_64bits(uprobe, insn);
379	}
380	#else
381	static void handle_riprel_insn(struct mm_struct mm, struct uprobe uprobe,
382	struct insn *insn)
383	{
384	return;
385	}
386
387	static int validate_insn_bits(struct mm_struct mm, struct uprobe uprobe,
388	struct insn *insn)
389	{
390	return validate_insn_32bits(uprobe, insn);
391	}
392	#endif /* CONFIG_X86_64 */
393
394	/**
395	* analyze_insn - instruction analysis including validity and fixups.
396	* @mm: the probed address space.
397	* @uprobe: the probepoint information.
398	* Return 0 on success or a -ve number on error.
399	*/
400	int analyze_insn(struct mm_struct mm, struct uprobe uprobe)
401	{
402	int ret;
403	struct insn insn;
404
405	uprobe->arch_info.fixups = 0;
406	ret = validate_insn_bits(mm, uprobe, &insn);
407	if (ret != 0)
408	return ret;
409	handle_riprel_insn(mm, uprobe, &insn);
410	prepare_fixups(uprobe, &insn);
411	return 0;
412	}