]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - arch/x86/kernel/kprobes/opt.c
Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[mirror_ubuntu-jammy-kernel.git] / arch / x86 / kernel / kprobes / opt.c
CommitLineData
3f33ab1c
MH
1/*
2 * Kernel Probes Jump Optimization (Optprobes)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2002, 2004
19 * Copyright (C) Hitachi Ltd., 2012
20 */
21#include <linux/kprobes.h>
22#include <linux/ptrace.h>
23#include <linux/string.h>
24#include <linux/slab.h>
25#include <linux/hardirq.h>
26#include <linux/preempt.h>
744c193e 27#include <linux/extable.h>
3f33ab1c
MH
28#include <linux/kdebug.h>
29#include <linux/kallsyms.h>
30#include <linux/ftrace.h>
c207aee4 31#include <linux/frame.h>
3f33ab1c 32
35de5b06 33#include <asm/text-patching.h>
3f33ab1c
MH
34#include <asm/cacheflush.h>
35#include <asm/desc.h>
36#include <asm/pgtable.h>
7c0f6ba6 37#include <linux/uaccess.h>
3f33ab1c
MH
38#include <asm/alternative.h>
39#include <asm/insn.h>
40#include <asm/debugreg.h>
e6ccbff0 41#include <asm/set_memory.h>
d9f5f32a 42#include <asm/sections.h>
c86a32c0 43#include <asm/nospec-branch.h>
3f33ab1c 44
f684199f 45#include "common.h"
3f33ab1c
MH
46
47unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
48{
49 struct optimized_kprobe *op;
50 struct kprobe *kp;
51 long offs;
52 int i;
53
54 for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
55 kp = get_kprobe((void *)addr - i);
56 /* This function only handles jump-optimized kprobe */
57 if (kp && kprobe_optimized(kp)) {
58 op = container_of(kp, struct optimized_kprobe, kp);
59 /* If op->list is not empty, op is under optimizing */
60 if (list_empty(&op->list))
61 goto found;
62 }
63 }
64
65 return addr;
66found:
67 /*
68 * If the kprobe can be optimized, original bytes which can be
69 * overwritten by jump destination address. In this case, original
70 * bytes must be recovered from op->optinsn.copied_insn buffer.
71 */
ea1e34fc
MH
72 if (probe_kernel_read(buf, (void *)addr,
73 MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
74 return 0UL;
75
3f33ab1c
MH
76 if (addr == (unsigned long)kp->addr) {
77 buf[0] = kp->opcode;
78 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
79 } else {
80 offs = addr - (unsigned long)kp->addr - 1;
81 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
82 }
83
84 return (unsigned long)buf;
85}
86
87/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
7ec8a97a 88static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
3f33ab1c
MH
89{
90#ifdef CONFIG_X86_64
91 *addr++ = 0x48;
92 *addr++ = 0xbf;
93#else
94 *addr++ = 0xb8;
95#endif
96 *(unsigned long *)addr = val;
97}
98
04bb591c 99asm (
c207aee4 100 "optprobe_template_func:\n"
3f33ab1c
MH
101 ".global optprobe_template_entry\n"
102 "optprobe_template_entry:\n"
103#ifdef CONFIG_X86_64
104 /* We don't bother saving the ss register */
105 " pushq %rsp\n"
106 " pushfq\n"
107 SAVE_REGS_STRING
108 " movq %rsp, %rsi\n"
109 ".global optprobe_template_val\n"
110 "optprobe_template_val:\n"
111 ASM_NOP5
112 ASM_NOP5
113 ".global optprobe_template_call\n"
114 "optprobe_template_call:\n"
115 ASM_NOP5
116 /* Move flags to rsp */
117 " movq 144(%rsp), %rdx\n"
118 " movq %rdx, 152(%rsp)\n"
119 RESTORE_REGS_STRING
120 /* Skip flags entry */
121 " addq $8, %rsp\n"
122 " popfq\n"
123#else /* CONFIG_X86_32 */
124 " pushf\n"
125 SAVE_REGS_STRING
126 " movl %esp, %edx\n"
127 ".global optprobe_template_val\n"
128 "optprobe_template_val:\n"
129 ASM_NOP5
130 ".global optprobe_template_call\n"
131 "optprobe_template_call:\n"
132 ASM_NOP5
133 RESTORE_REGS_STRING
134 " addl $4, %esp\n" /* skip cs */
135 " popf\n"
136#endif
137 ".global optprobe_template_end\n"
c207aee4
JP
138 "optprobe_template_end:\n"
139 ".type optprobe_template_func, @function\n"
140 ".size optprobe_template_func, .-optprobe_template_func\n");
141
142void optprobe_template_func(void);
143STACK_FRAME_NON_STANDARD(optprobe_template_func);
3f33ab1c
MH
144
145#define TMPL_MOVE_IDX \
a8976fc8 146 ((long)optprobe_template_val - (long)optprobe_template_entry)
3f33ab1c 147#define TMPL_CALL_IDX \
a8976fc8 148 ((long)optprobe_template_call - (long)optprobe_template_entry)
3f33ab1c 149#define TMPL_END_IDX \
a8976fc8 150 ((long)optprobe_template_end - (long)optprobe_template_entry)
3f33ab1c
MH
151
152#define INT3_SIZE sizeof(kprobe_opcode_t)
153
154/* Optimized kprobe call back function: called from optinsn */
9326638c
MH
155static void
156optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
3f33ab1c 157{
3f33ab1c
MH
158 /* This is possible if op is under delayed unoptimizing */
159 if (kprobe_disabled(&op->kp))
160 return;
161
9a09f261 162 preempt_disable();
3f33ab1c
MH
163 if (kprobe_running()) {
164 kprobes_inc_nmissed_count(&op->kp);
165 } else {
cd52edad 166 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
3f33ab1c
MH
167 /* Save skipped registers */
168#ifdef CONFIG_X86_64
169 regs->cs = __KERNEL_CS;
170#else
171 regs->cs = __KERNEL_CS | get_kernel_rpl();
172 regs->gs = 0;
173#endif
174 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
175 regs->orig_ax = ~0UL;
176
177 __this_cpu_write(current_kprobe, &op->kp);
178 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
179 opt_pre_handler(&op->kp, regs);
180 __this_cpu_write(current_kprobe, NULL);
181 }
9a09f261 182 preempt_enable_no_resched();
3f33ab1c 183}
9326638c 184NOKPROBE_SYMBOL(optimized_callback);
3f33ab1c 185
63fef14f 186static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
3f33ab1c 187{
a8d11cd0 188 struct insn insn;
3f33ab1c
MH
189 int len = 0, ret;
190
191 while (len < RELATIVEJUMP_SIZE) {
63fef14f 192 ret = __copy_instruction(dest + len, src + len, real, &insn);
a8d11cd0 193 if (!ret || !can_boost(&insn, src + len))
3f33ab1c
MH
194 return -EINVAL;
195 len += ret;
196 }
197 /* Check whether the address range is reserved */
198 if (ftrace_text_reserved(src, src + len - 1) ||
199 alternatives_text_reserved(src, src + len - 1) ||
200 jump_label_text_reserved(src, src + len - 1))
201 return -EBUSY;
202
203 return len;
204}
205
206/* Check whether insn is indirect jump */
c86a32c0 207static int __insn_is_indirect_jump(struct insn *insn)
3f33ab1c
MH
208{
209 return ((insn->opcode.bytes[0] == 0xff &&
210 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
211 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
212}
213
214/* Check whether insn jumps into specified address range */
215static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
216{
217 unsigned long target = 0;
218
219 switch (insn->opcode.bytes[0]) {
220 case 0xe0: /* loopne */
221 case 0xe1: /* loope */
222 case 0xe2: /* loop */
223 case 0xe3: /* jcxz */
224 case 0xe9: /* near relative jump */
225 case 0xeb: /* short relative jump */
226 break;
227 case 0x0f:
228 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
229 break;
230 return 0;
231 default:
232 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
233 break;
234 return 0;
235 }
236 target = (unsigned long)insn->next_byte + insn->immediate.value;
237
238 return (start <= target && target <= start + len);
239}
240
c86a32c0
MH
241static int insn_is_indirect_jump(struct insn *insn)
242{
243 int ret = __insn_is_indirect_jump(insn);
244
245#ifdef CONFIG_RETPOLINE
246 /*
247 * Jump to x86_indirect_thunk_* is treated as an indirect jump.
248 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
249 * older gcc may use indirect jump. So we add this check instead of
250 * replace indirect-jump check.
251 */
252 if (!ret)
253 ret = insn_jump_into_range(insn,
254 (unsigned long)__indirect_thunk_start,
255 (unsigned long)__indirect_thunk_end -
256 (unsigned long)__indirect_thunk_start);
257#endif
258 return ret;
259}
260
3f33ab1c 261/* Decode whole function to ensure any instructions don't jump into target */
7ec8a97a 262static int can_optimize(unsigned long paddr)
3f33ab1c
MH
263{
264 unsigned long addr, size = 0, offset = 0;
265 struct insn insn;
266 kprobe_opcode_t buf[MAX_INSN_SIZE];
267
268 /* Lookup symbol including addr */
269 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
270 return 0;
271
272 /*
273 * Do not optimize in the entry code due to the unstable
d9f5f32a 274 * stack handling and registers setup.
3f33ab1c 275 */
d9f5f32a
MH
276 if (((paddr >= (unsigned long)__entry_text_start) &&
277 (paddr < (unsigned long)__entry_text_end)) ||
278 ((paddr >= (unsigned long)__irqentry_text_start) &&
279 (paddr < (unsigned long)__irqentry_text_end)))
3f33ab1c
MH
280 return 0;
281
282 /* Check there is enough space for a relative jump. */
283 if (size - offset < RELATIVEJUMP_SIZE)
284 return 0;
285
286 /* Decode instructions */
287 addr = paddr - offset;
288 while (addr < paddr - offset + size) { /* Decode until function end */
6ba48ff4 289 unsigned long recovered_insn;
3f33ab1c
MH
290 if (search_exception_tables(addr))
291 /*
292 * Since some fixup code will jumps into this function,
293 * we can't optimize kprobe in this function.
294 */
295 return 0;
6ba48ff4 296 recovered_insn = recover_probed_instruction(buf, addr);
2a6730c8
PM
297 if (!recovered_insn)
298 return 0;
6ba48ff4 299 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
3f33ab1c
MH
300 insn_get_length(&insn);
301 /* Another subsystem puts a breakpoint */
302 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
303 return 0;
304 /* Recover address */
305 insn.kaddr = (void *)addr;
306 insn.next_byte = (void *)(addr + insn.length);
307 /* Check any instructions don't jump into target */
308 if (insn_is_indirect_jump(&insn) ||
309 insn_jump_into_range(&insn, paddr + INT3_SIZE,
310 RELATIVE_ADDR_SIZE))
311 return 0;
312 addr += insn.length;
313 }
314
315 return 1;
316}
317
318/* Check optimized_kprobe can actually be optimized. */
7ec8a97a 319int arch_check_optimized_kprobe(struct optimized_kprobe *op)
3f33ab1c
MH
320{
321 int i;
322 struct kprobe *p;
323
324 for (i = 1; i < op->optinsn.size; i++) {
325 p = get_kprobe(op->kp.addr + i);
326 if (p && !kprobe_disabled(p))
327 return -EEXIST;
328 }
329
330 return 0;
331}
332
333/* Check the addr is within the optimized instructions. */
7ec8a97a
MH
334int arch_within_optimized_kprobe(struct optimized_kprobe *op,
335 unsigned long addr)
3f33ab1c
MH
336{
337 return ((unsigned long)op->kp.addr <= addr &&
338 (unsigned long)op->kp.addr + op->optinsn.size > addr);
339}
340
341/* Free optimized instruction slot */
7ec8a97a 342static
3f33ab1c
MH
343void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
344{
345 if (op->optinsn.insn) {
346 free_optinsn_slot(op->optinsn.insn, dirty);
347 op->optinsn.insn = NULL;
348 op->optinsn.size = 0;
349 }
350}
351
7ec8a97a 352void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
3f33ab1c
MH
353{
354 __arch_remove_optimized_kprobe(op, 1);
355}
356
357/*
358 * Copy replacing target instructions
359 * Target instructions MUST be relocatable (checked inside)
360 * This is called when new aggr(opt)probe is allocated or reused.
361 */
cbf6ab52
MH
362int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
363 struct kprobe *__unused)
3f33ab1c 364{
63fef14f
MH
365 u8 *buf = NULL, *slot;
366 int ret, len;
3f33ab1c
MH
367 long rel;
368
369 if (!can_optimize((unsigned long)op->kp.addr))
370 return -EILSEQ;
371
63fef14f
MH
372 buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
373 if (!buf)
3f33ab1c
MH
374 return -ENOMEM;
375
63fef14f
MH
376 op->optinsn.insn = slot = get_optinsn_slot();
377 if (!slot) {
378 ret = -ENOMEM;
379 goto out;
380 }
381
3f33ab1c
MH
382 /*
383 * Verify if the address gap is in 2GB range, because this uses
384 * a relative jump.
385 */
63fef14f 386 rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
256aae5e 387 if (abs(rel) > 0x7fffffff) {
63fef14f
MH
388 ret = -ERANGE;
389 goto err;
256aae5e 390 }
3f33ab1c 391
63fef14f 392 /* Copy arch-dep-instance from template */
a8976fc8 393 memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
3f33ab1c
MH
394
395 /* Copy instructions into the out-of-line buffer */
63fef14f
MH
396 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
397 slot + TMPL_END_IDX);
398 if (ret < 0)
399 goto err;
3f33ab1c 400 op->optinsn.size = ret;
63fef14f 401 len = TMPL_END_IDX + op->optinsn.size;
3f33ab1c
MH
402
403 /* Set probe information */
404 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
405
406 /* Set probe function call */
63fef14f
MH
407 synthesize_relcall(buf + TMPL_CALL_IDX,
408 slot + TMPL_CALL_IDX, optimized_callback);
3f33ab1c
MH
409
410 /* Set returning jmp instruction at the tail of out-of-line buffer */
63fef14f 411 synthesize_reljump(buf + len, slot + len,
3f33ab1c 412 (u8 *)op->kp.addr + op->optinsn.size);
63fef14f
MH
413 len += RELATIVEJUMP_SIZE;
414
415 /* We have to use text_poke for instuction buffer because it is RO */
416 text_poke(slot, buf, len);
417 ret = 0;
418out:
419 kfree(buf);
420 return ret;
421
422err:
423 __arch_remove_optimized_kprobe(op, 0);
424 goto out;
3f33ab1c
MH
425}
426
3f33ab1c
MH
427/*
428 * Replace breakpoints (int3) with relative jumps.
429 * Caller must call with locking kprobe_mutex and text_mutex.
430 */
7ec8a97a 431void arch_optimize_kprobes(struct list_head *oplist)
3f33ab1c
MH
432{
433 struct optimized_kprobe *op, *tmp;
a7b0133e 434 u8 insn_buf[RELATIVEJUMP_SIZE];
3f33ab1c
MH
435
436 list_for_each_entry_safe(op, tmp, oplist, list) {
a7b0133e
MH
437 s32 rel = (s32)((long)op->optinsn.insn -
438 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
439
3f33ab1c 440 WARN_ON(kprobe_disabled(&op->kp));
a7b0133e
MH
441
442 /* Backup instructions which will be replaced by jump address */
443 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
444 RELATIVE_ADDR_SIZE);
445
446 insn_buf[0] = RELATIVEJUMP_OPCODE;
447 *(s32 *)(&insn_buf[1]) = rel;
448
449 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
450 op->optinsn.insn);
451
3f33ab1c 452 list_del_init(&op->list);
3f33ab1c 453 }
3f33ab1c
MH
454}
455
a7b0133e 456/* Replace a relative jump with a breakpoint (int3). */
7ec8a97a 457void arch_unoptimize_kprobe(struct optimized_kprobe *op)
3f33ab1c 458{
a7b0133e
MH
459 u8 insn_buf[RELATIVEJUMP_SIZE];
460
3f33ab1c
MH
461 /* Set int3 to first byte for kprobes */
462 insn_buf[0] = BREAKPOINT_INSTRUCTION;
463 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
a7b0133e
MH
464 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
465 op->optinsn.insn);
3f33ab1c
MH
466}
467
468/*
469 * Recover original instructions and breakpoints from relative jumps.
470 * Caller must call with locking kprobe_mutex.
471 */
472extern void arch_unoptimize_kprobes(struct list_head *oplist,
473 struct list_head *done_list)
474{
475 struct optimized_kprobe *op, *tmp;
3f33ab1c
MH
476
477 list_for_each_entry_safe(op, tmp, oplist, list) {
a7b0133e 478 arch_unoptimize_kprobe(op);
3f33ab1c 479 list_move(&op->list, done_list);
3f33ab1c 480 }
3f33ab1c
MH
481}
482
9326638c 483int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
3f33ab1c
MH
484{
485 struct optimized_kprobe *op;
486
487 if (p->flags & KPROBE_FLAG_OPTIMIZED) {
488 /* This kprobe is really able to run optimized path. */
489 op = container_of(p, struct optimized_kprobe, kp);
490 /* Detour through copied instructions */
491 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
492 if (!reenter)
493 reset_current_kprobe();
494 preempt_enable_no_resched();
495 return 1;
496 }
497 return 0;
498}
9326638c 499NOKPROBE_SYMBOL(setup_detour_execution);