]>
Commit | Line | Data |
---|---|---|
ee9f8fce JP |
1 | #include <linux/module.h> |
2 | #include <linux/sort.h> | |
3 | #include <asm/ptrace.h> | |
4 | #include <asm/stacktrace.h> | |
5 | #include <asm/unwind.h> | |
6 | #include <asm/orc_types.h> | |
7 | #include <asm/orc_lookup.h> | |
8 | #include <asm/sections.h> | |
9 | ||
10 | #define orc_warn(fmt, ...) \ | |
11 | printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__) | |
12 | ||
13 | extern int __start_orc_unwind_ip[]; | |
14 | extern int __stop_orc_unwind_ip[]; | |
15 | extern struct orc_entry __start_orc_unwind[]; | |
16 | extern struct orc_entry __stop_orc_unwind[]; | |
17 | ||
18 | static DEFINE_MUTEX(sort_mutex); | |
19 | int *cur_orc_ip_table = __start_orc_unwind_ip; | |
20 | struct orc_entry *cur_orc_table = __start_orc_unwind; | |
21 | ||
22 | unsigned int lookup_num_blocks; | |
23 | bool orc_init; | |
24 | ||
25 | static inline unsigned long orc_ip(const int *ip) | |
26 | { | |
27 | return (unsigned long)ip + *ip; | |
28 | } | |
29 | ||
30 | static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table, | |
31 | unsigned int num_entries, unsigned long ip) | |
32 | { | |
33 | int *first = ip_table; | |
34 | int *last = ip_table + num_entries - 1; | |
35 | int *mid = first, *found = first; | |
36 | ||
37 | if (!num_entries) | |
38 | return NULL; | |
39 | ||
40 | /* | |
41 | * Do a binary range search to find the rightmost duplicate of a given | |
42 | * starting address. Some entries are section terminators which are | |
43 | * "weak" entries for ensuring there are no gaps. They should be | |
44 | * ignored when they conflict with a real entry. | |
45 | */ | |
46 | while (first <= last) { | |
47 | mid = first + ((last - first) / 2); | |
48 | ||
49 | if (orc_ip(mid) <= ip) { | |
50 | found = mid; | |
51 | first = mid + 1; | |
52 | } else | |
53 | last = mid - 1; | |
54 | } | |
55 | ||
56 | return u_table + (found - ip_table); | |
57 | } | |
58 | ||
59 | #ifdef CONFIG_MODULES | |
60 | static struct orc_entry *orc_module_find(unsigned long ip) | |
61 | { | |
62 | struct module *mod; | |
63 | ||
64 | mod = __module_address(ip); | |
65 | if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip) | |
66 | return NULL; | |
67 | return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind, | |
68 | mod->arch.num_orcs, ip); | |
69 | } | |
70 | #else | |
71 | static struct orc_entry *orc_module_find(unsigned long ip) | |
72 | { | |
73 | return NULL; | |
74 | } | |
75 | #endif | |
76 | ||
77 | static struct orc_entry *orc_find(unsigned long ip) | |
78 | { | |
79 | if (!orc_init) | |
80 | return NULL; | |
81 | ||
82 | /* For non-init vmlinux addresses, use the fast lookup table: */ | |
83 | if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) { | |
84 | unsigned int idx, start, stop; | |
85 | ||
86 | idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE; | |
87 | ||
88 | if (unlikely((idx >= lookup_num_blocks-1))) { | |
89 | orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%lx\n", | |
90 | idx, lookup_num_blocks, ip); | |
91 | return NULL; | |
92 | } | |
93 | ||
94 | start = orc_lookup[idx]; | |
95 | stop = orc_lookup[idx + 1] + 1; | |
96 | ||
97 | if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) || | |
98 | (__start_orc_unwind + stop > __stop_orc_unwind))) { | |
99 | orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%lx\n", | |
100 | idx, lookup_num_blocks, start, stop, ip); | |
101 | return NULL; | |
102 | } | |
103 | ||
104 | return __orc_find(__start_orc_unwind_ip + start, | |
105 | __start_orc_unwind + start, stop - start, ip); | |
106 | } | |
107 | ||
108 | /* vmlinux .init slow lookup: */ | |
109 | if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext) | |
110 | return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, | |
111 | __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); | |
112 | ||
113 | /* Module lookup: */ | |
114 | return orc_module_find(ip); | |
115 | } | |
116 | ||
117 | static void orc_sort_swap(void *_a, void *_b, int size) | |
118 | { | |
119 | struct orc_entry *orc_a, *orc_b; | |
120 | struct orc_entry orc_tmp; | |
121 | int *a = _a, *b = _b, tmp; | |
122 | int delta = _b - _a; | |
123 | ||
124 | /* Swap the .orc_unwind_ip entries: */ | |
125 | tmp = *a; | |
126 | *a = *b + delta; | |
127 | *b = tmp - delta; | |
128 | ||
129 | /* Swap the corresponding .orc_unwind entries: */ | |
130 | orc_a = cur_orc_table + (a - cur_orc_ip_table); | |
131 | orc_b = cur_orc_table + (b - cur_orc_ip_table); | |
132 | orc_tmp = *orc_a; | |
133 | *orc_a = *orc_b; | |
134 | *orc_b = orc_tmp; | |
135 | } | |
136 | ||
137 | static int orc_sort_cmp(const void *_a, const void *_b) | |
138 | { | |
139 | struct orc_entry *orc_a; | |
140 | const int *a = _a, *b = _b; | |
141 | unsigned long a_val = orc_ip(a); | |
142 | unsigned long b_val = orc_ip(b); | |
143 | ||
144 | if (a_val > b_val) | |
145 | return 1; | |
146 | if (a_val < b_val) | |
147 | return -1; | |
148 | ||
149 | /* | |
150 | * The "weak" section terminator entries need to always be on the left | |
151 | * to ensure the lookup code skips them in favor of real entries. | |
152 | * These terminator entries exist to handle any gaps created by | |
153 | * whitelisted .o files which didn't get objtool generation. | |
154 | */ | |
155 | orc_a = cur_orc_table + (a - cur_orc_ip_table); | |
156 | return orc_a->sp_reg == ORC_REG_UNDEFINED ? -1 : 1; | |
157 | } | |
158 | ||
159 | #ifdef CONFIG_MODULES | |
160 | void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size, | |
161 | void *_orc, size_t orc_size) | |
162 | { | |
163 | int *orc_ip = _orc_ip; | |
164 | struct orc_entry *orc = _orc; | |
165 | unsigned int num_entries = orc_ip_size / sizeof(int); | |
166 | ||
167 | WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 || | |
168 | orc_size % sizeof(*orc) != 0 || | |
169 | num_entries != orc_size / sizeof(*orc)); | |
170 | ||
171 | /* | |
172 | * The 'cur_orc_*' globals allow the orc_sort_swap() callback to | |
173 | * associate an .orc_unwind_ip table entry with its corresponding | |
174 | * .orc_unwind entry so they can both be swapped. | |
175 | */ | |
176 | mutex_lock(&sort_mutex); | |
177 | cur_orc_ip_table = orc_ip; | |
178 | cur_orc_table = orc; | |
179 | sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap); | |
180 | mutex_unlock(&sort_mutex); | |
181 | ||
182 | mod->arch.orc_unwind_ip = orc_ip; | |
183 | mod->arch.orc_unwind = orc; | |
184 | mod->arch.num_orcs = num_entries; | |
185 | } | |
186 | #endif | |
187 | ||
188 | void __init unwind_init(void) | |
189 | { | |
190 | size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip; | |
191 | size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind; | |
192 | size_t num_entries = orc_ip_size / sizeof(int); | |
193 | struct orc_entry *orc; | |
194 | int i; | |
195 | ||
196 | if (!num_entries || orc_ip_size % sizeof(int) != 0 || | |
197 | orc_size % sizeof(struct orc_entry) != 0 || | |
198 | num_entries != orc_size / sizeof(struct orc_entry)) { | |
199 | orc_warn("WARNING: Bad or missing .orc_unwind table. Disabling unwinder.\n"); | |
200 | return; | |
201 | } | |
202 | ||
203 | /* Sort the .orc_unwind and .orc_unwind_ip tables: */ | |
204 | sort(__start_orc_unwind_ip, num_entries, sizeof(int), orc_sort_cmp, | |
205 | orc_sort_swap); | |
206 | ||
207 | /* Initialize the fast lookup table: */ | |
208 | lookup_num_blocks = orc_lookup_end - orc_lookup; | |
209 | for (i = 0; i < lookup_num_blocks-1; i++) { | |
210 | orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, | |
211 | num_entries, | |
212 | LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i)); | |
213 | if (!orc) { | |
214 | orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n"); | |
215 | return; | |
216 | } | |
217 | ||
218 | orc_lookup[i] = orc - __start_orc_unwind; | |
219 | } | |
220 | ||
221 | /* Initialize the ending block: */ | |
222 | orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries, | |
223 | LOOKUP_STOP_IP); | |
224 | if (!orc) { | |
225 | orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n"); | |
226 | return; | |
227 | } | |
228 | orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind; | |
229 | ||
230 | orc_init = true; | |
231 | } | |
232 | ||
233 | unsigned long unwind_get_return_address(struct unwind_state *state) | |
234 | { | |
235 | if (unwind_done(state)) | |
236 | return 0; | |
237 | ||
238 | return __kernel_text_address(state->ip) ? state->ip : 0; | |
239 | } | |
240 | EXPORT_SYMBOL_GPL(unwind_get_return_address); | |
241 | ||
242 | unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) | |
243 | { | |
244 | if (unwind_done(state)) | |
245 | return NULL; | |
246 | ||
247 | if (state->regs) | |
248 | return &state->regs->ip; | |
249 | ||
250 | if (state->sp) | |
251 | return (unsigned long *)state->sp - 1; | |
252 | ||
253 | return NULL; | |
254 | } | |
255 | ||
256 | static bool stack_access_ok(struct unwind_state *state, unsigned long addr, | |
257 | size_t len) | |
258 | { | |
259 | struct stack_info *info = &state->stack_info; | |
260 | ||
261 | /* | |
262 | * If the address isn't on the current stack, switch to the next one. | |
263 | * | |
264 | * We may have to traverse multiple stacks to deal with the possibility | |
265 | * that info->next_sp could point to an empty stack and the address | |
266 | * could be on a subsequent stack. | |
267 | */ | |
268 | while (!on_stack(info, (void *)addr, len)) | |
269 | if (get_stack_info(info->next_sp, state->task, info, | |
270 | &state->stack_mask)) | |
271 | return false; | |
272 | ||
273 | return true; | |
274 | } | |
275 | ||
276 | static bool deref_stack_reg(struct unwind_state *state, unsigned long addr, | |
277 | unsigned long *val) | |
278 | { | |
279 | if (!stack_access_ok(state, addr, sizeof(long))) | |
280 | return false; | |
281 | ||
282 | *val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr); | |
283 | return true; | |
284 | } | |
285 | ||
286 | #define REGS_SIZE (sizeof(struct pt_regs)) | |
287 | #define SP_OFFSET (offsetof(struct pt_regs, sp)) | |
288 | #define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip)) | |
289 | #define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip)) | |
290 | ||
291 | static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, | |
292 | unsigned long *ip, unsigned long *sp, bool full) | |
293 | { | |
294 | size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; | |
295 | size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET; | |
296 | struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE); | |
297 | ||
298 | if (IS_ENABLED(CONFIG_X86_64)) { | |
299 | if (!stack_access_ok(state, addr, regs_size)) | |
300 | return false; | |
301 | ||
302 | *ip = regs->ip; | |
303 | *sp = regs->sp; | |
304 | ||
305 | return true; | |
306 | } | |
307 | ||
308 | if (!stack_access_ok(state, addr, sp_offset)) | |
309 | return false; | |
310 | ||
311 | *ip = regs->ip; | |
312 | ||
313 | if (user_mode(regs)) { | |
314 | if (!stack_access_ok(state, addr + sp_offset, | |
315 | REGS_SIZE - SP_OFFSET)) | |
316 | return false; | |
317 | ||
318 | *sp = regs->sp; | |
319 | } else | |
320 | *sp = (unsigned long)®s->sp; | |
321 | ||
322 | return true; | |
323 | } | |
324 | ||
325 | bool unwind_next_frame(struct unwind_state *state) | |
326 | { | |
327 | unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; | |
328 | enum stack_type prev_type = state->stack_info.type; | |
329 | struct orc_entry *orc; | |
330 | struct pt_regs *ptregs; | |
331 | bool indirect = false; | |
332 | ||
333 | if (unwind_done(state)) | |
334 | return false; | |
335 | ||
336 | /* Don't let modules unload while we're reading their ORC data. */ | |
337 | preempt_disable(); | |
338 | ||
339 | /* Have we reached the end? */ | |
340 | if (state->regs && user_mode(state->regs)) | |
341 | goto done; | |
342 | ||
343 | /* | |
344 | * Find the orc_entry associated with the text address. | |
345 | * | |
346 | * Decrement call return addresses by one so they work for sibling | |
347 | * calls and calls to noreturn functions. | |
348 | */ | |
349 | orc = orc_find(state->signal ? state->ip : state->ip - 1); | |
350 | if (!orc || orc->sp_reg == ORC_REG_UNDEFINED) | |
351 | goto done; | |
352 | orig_ip = state->ip; | |
353 | ||
354 | /* Find the previous frame's stack: */ | |
355 | switch (orc->sp_reg) { | |
356 | case ORC_REG_SP: | |
357 | sp = state->sp + orc->sp_offset; | |
358 | break; | |
359 | ||
360 | case ORC_REG_BP: | |
361 | sp = state->bp + orc->sp_offset; | |
362 | break; | |
363 | ||
364 | case ORC_REG_SP_INDIRECT: | |
365 | sp = state->sp + orc->sp_offset; | |
366 | indirect = true; | |
367 | break; | |
368 | ||
369 | case ORC_REG_BP_INDIRECT: | |
370 | sp = state->bp + orc->sp_offset; | |
371 | indirect = true; | |
372 | break; | |
373 | ||
374 | case ORC_REG_R10: | |
375 | if (!state->regs || !state->full_regs) { | |
376 | orc_warn("missing regs for base reg R10 at ip %p\n", | |
377 | (void *)state->ip); | |
378 | goto done; | |
379 | } | |
380 | sp = state->regs->r10; | |
381 | break; | |
382 | ||
383 | case ORC_REG_R13: | |
384 | if (!state->regs || !state->full_regs) { | |
385 | orc_warn("missing regs for base reg R13 at ip %p\n", | |
386 | (void *)state->ip); | |
387 | goto done; | |
388 | } | |
389 | sp = state->regs->r13; | |
390 | break; | |
391 | ||
392 | case ORC_REG_DI: | |
393 | if (!state->regs || !state->full_regs) { | |
394 | orc_warn("missing regs for base reg DI at ip %p\n", | |
395 | (void *)state->ip); | |
396 | goto done; | |
397 | } | |
398 | sp = state->regs->di; | |
399 | break; | |
400 | ||
401 | case ORC_REG_DX: | |
402 | if (!state->regs || !state->full_regs) { | |
403 | orc_warn("missing regs for base reg DX at ip %p\n", | |
404 | (void *)state->ip); | |
405 | goto done; | |
406 | } | |
407 | sp = state->regs->dx; | |
408 | break; | |
409 | ||
410 | default: | |
411 | orc_warn("unknown SP base reg %d for ip %p\n", | |
412 | orc->sp_reg, (void *)state->ip); | |
413 | goto done; | |
414 | } | |
415 | ||
416 | if (indirect) { | |
417 | if (!deref_stack_reg(state, sp, &sp)) | |
418 | goto done; | |
419 | } | |
420 | ||
421 | /* Find IP, SP and possibly regs: */ | |
422 | switch (orc->type) { | |
423 | case ORC_TYPE_CALL: | |
424 | ip_p = sp - sizeof(long); | |
425 | ||
426 | if (!deref_stack_reg(state, ip_p, &state->ip)) | |
427 | goto done; | |
428 | ||
429 | state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, | |
430 | state->ip, (void *)ip_p); | |
431 | ||
432 | state->sp = sp; | |
433 | state->regs = NULL; | |
434 | state->signal = false; | |
435 | break; | |
436 | ||
437 | case ORC_TYPE_REGS: | |
438 | if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { | |
439 | orc_warn("can't dereference registers at %p for ip %p\n", | |
440 | (void *)sp, (void *)orig_ip); | |
441 | goto done; | |
442 | } | |
443 | ||
444 | state->regs = (struct pt_regs *)sp; | |
445 | state->full_regs = true; | |
446 | state->signal = true; | |
447 | break; | |
448 | ||
449 | case ORC_TYPE_REGS_IRET: | |
450 | if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { | |
451 | orc_warn("can't dereference iret registers at %p for ip %p\n", | |
452 | (void *)sp, (void *)orig_ip); | |
453 | goto done; | |
454 | } | |
455 | ||
456 | ptregs = container_of((void *)sp, struct pt_regs, ip); | |
457 | if ((unsigned long)ptregs >= prev_sp && | |
458 | on_stack(&state->stack_info, ptregs, REGS_SIZE)) { | |
459 | state->regs = ptregs; | |
460 | state->full_regs = false; | |
461 | } else | |
462 | state->regs = NULL; | |
463 | ||
464 | state->signal = true; | |
465 | break; | |
466 | ||
467 | default: | |
468 | orc_warn("unknown .orc_unwind entry type %d\n", orc->type); | |
469 | break; | |
470 | } | |
471 | ||
472 | /* Find BP: */ | |
473 | switch (orc->bp_reg) { | |
474 | case ORC_REG_UNDEFINED: | |
475 | if (state->regs && state->full_regs) | |
476 | state->bp = state->regs->bp; | |
477 | break; | |
478 | ||
479 | case ORC_REG_PREV_SP: | |
480 | if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp)) | |
481 | goto done; | |
482 | break; | |
483 | ||
484 | case ORC_REG_BP: | |
485 | if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp)) | |
486 | goto done; | |
487 | break; | |
488 | ||
489 | default: | |
490 | orc_warn("unknown BP base reg %d for ip %p\n", | |
491 | orc->bp_reg, (void *)orig_ip); | |
492 | goto done; | |
493 | } | |
494 | ||
495 | /* Prevent a recursive loop due to bad ORC data: */ | |
496 | if (state->stack_info.type == prev_type && | |
497 | on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) && | |
498 | state->sp <= prev_sp) { | |
499 | orc_warn("stack going in the wrong direction? ip=%p\n", | |
500 | (void *)orig_ip); | |
501 | goto done; | |
502 | } | |
503 | ||
504 | preempt_enable(); | |
505 | return true; | |
506 | ||
507 | done: | |
508 | preempt_enable(); | |
509 | state->stack_info.type = STACK_TYPE_UNKNOWN; | |
510 | return false; | |
511 | } | |
512 | EXPORT_SYMBOL_GPL(unwind_next_frame); | |
513 | ||
514 | void __unwind_start(struct unwind_state *state, struct task_struct *task, | |
515 | struct pt_regs *regs, unsigned long *first_frame) | |
516 | { | |
517 | memset(state, 0, sizeof(*state)); | |
518 | state->task = task; | |
519 | ||
520 | /* | |
521 | * Refuse to unwind the stack of a task while it's executing on another | |
522 | * CPU. This check is racy, but that's ok: the unwinder has other | |
523 | * checks to prevent it from going off the rails. | |
524 | */ | |
525 | if (task_on_another_cpu(task)) | |
526 | goto done; | |
527 | ||
528 | if (regs) { | |
529 | if (user_mode(regs)) | |
530 | goto done; | |
531 | ||
532 | state->ip = regs->ip; | |
533 | state->sp = kernel_stack_pointer(regs); | |
534 | state->bp = regs->bp; | |
535 | state->regs = regs; | |
536 | state->full_regs = true; | |
537 | state->signal = true; | |
538 | ||
539 | } else if (task == current) { | |
540 | asm volatile("lea (%%rip), %0\n\t" | |
541 | "mov %%rsp, %1\n\t" | |
542 | "mov %%rbp, %2\n\t" | |
543 | : "=r" (state->ip), "=r" (state->sp), | |
544 | "=r" (state->bp)); | |
545 | ||
546 | } else { | |
547 | struct inactive_task_frame *frame = (void *)task->thread.sp; | |
548 | ||
549 | state->sp = task->thread.sp; | |
550 | state->bp = READ_ONCE_NOCHECK(frame->bp); | |
551 | state->ip = READ_ONCE_NOCHECK(frame->ret_addr); | |
552 | } | |
553 | ||
554 | if (get_stack_info((unsigned long *)state->sp, state->task, | |
555 | &state->stack_info, &state->stack_mask)) | |
556 | return; | |
557 | ||
558 | /* | |
559 | * The caller can provide the address of the first frame directly | |
560 | * (first_frame) or indirectly (regs->sp) to indicate which stack frame | |
561 | * to start unwinding at. Skip ahead until we reach it. | |
562 | */ | |
563 | ||
564 | /* When starting from regs, skip the regs frame: */ | |
565 | if (regs) { | |
566 | unwind_next_frame(state); | |
567 | return; | |
568 | } | |
569 | ||
570 | /* Otherwise, skip ahead to the user-specified starting frame: */ | |
571 | while (!unwind_done(state) && | |
572 | (!on_stack(&state->stack_info, first_frame, sizeof(long)) || | |
573 | state->sp <= (unsigned long)first_frame)) | |
574 | unwind_next_frame(state); | |
575 | ||
576 | return; | |
577 | ||
578 | done: | |
579 | state->stack_info.type = STACK_TYPE_UNKNOWN; | |
580 | return; | |
581 | } | |
582 | EXPORT_SYMBOL_GPL(__unwind_start); |