]>
Commit | Line | Data |
---|---|---|
7c7900f8 | 1 | #include <linux/sched.h> |
29930025 | 2 | #include <linux/sched/task.h> |
68db0cf1 | 3 | #include <linux/sched/task_stack.h> |
7c7900f8 JP |
4 | #include <asm/ptrace.h> |
5 | #include <asm/bitops.h> | |
6 | #include <asm/stacktrace.h> | |
7 | #include <asm/unwind.h> | |
8 | ||
9 | #define FRAME_HEADER_SIZE (sizeof(long) * 2) | |
10 | ||
84936118 JP |
11 | /* |
12 | * This disables KASAN checking when reading a value from another task's stack, | |
13 | * since the other task could be running on another CPU and could have poisoned | |
14 | * the stack in the meantime. | |
15 | */ | |
16 | #define READ_ONCE_TASK_STACK(task, x) \ | |
17 | ({ \ | |
18 | unsigned long val; \ | |
19 | if (task == current) \ | |
20 | val = READ_ONCE(x); \ | |
21 | else \ | |
22 | val = READ_ONCE_NOCHECK(x); \ | |
23 | val; \ | |
24 | }) | |
25 | ||
8b5e99f0 JP |
26 | static void unwind_dump(struct unwind_state *state, unsigned long *sp) |
27 | { | |
28 | static bool dumped_before = false; | |
29 | bool prev_zero, zero = false; | |
30 | unsigned long word; | |
31 | ||
32 | if (dumped_before) | |
33 | return; | |
34 | ||
35 | dumped_before = true; | |
36 | ||
37 | printk_deferred("unwind stack type:%d next_sp:%p mask:%lx graph_idx:%d\n", | |
38 | state->stack_info.type, state->stack_info.next_sp, | |
39 | state->stack_mask, state->graph_idx); | |
40 | ||
41 | for (sp = state->orig_sp; sp < state->stack_info.end; sp++) { | |
42 | word = READ_ONCE_NOCHECK(*sp); | |
43 | ||
44 | prev_zero = zero; | |
45 | zero = word == 0; | |
46 | ||
47 | if (zero) { | |
48 | if (!prev_zero) | |
49 | printk_deferred("%p: %016x ...\n", sp, 0); | |
50 | continue; | |
51 | } | |
52 | ||
53 | printk_deferred("%p: %016lx (%pB)\n", sp, word, (void *)word); | |
54 | } | |
55 | } | |
56 | ||
7c7900f8 JP |
57 | unsigned long unwind_get_return_address(struct unwind_state *state) |
58 | { | |
59 | unsigned long addr; | |
60 | unsigned long *addr_p = unwind_get_return_address_ptr(state); | |
61 | ||
62 | if (unwind_done(state)) | |
63 | return 0; | |
64 | ||
946c1911 JP |
65 | if (state->regs && user_mode(state->regs)) |
66 | return 0; | |
67 | ||
84936118 JP |
68 | addr = READ_ONCE_TASK_STACK(state->task, *addr_p); |
69 | addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr, | |
7c7900f8 JP |
70 | addr_p); |
71 | ||
c280f773 | 72 | return __kernel_text_address(addr) ? addr : 0; |
7c7900f8 JP |
73 | } |
74 | EXPORT_SYMBOL_GPL(unwind_get_return_address); | |
75 | ||
24d86f59 JP |
76 | static size_t regs_size(struct pt_regs *regs) |
77 | { | |
78 | /* x86_32 regs from kernel mode are two words shorter: */ | |
79 | if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs)) | |
80 | return sizeof(*regs) - 2*sizeof(long); | |
81 | ||
82 | return sizeof(*regs); | |
83 | } | |
84 | ||
87a6b297 JP |
85 | #ifdef CONFIG_X86_32 |
86 | #define GCC_REALIGN_WORDS 3 | |
87 | #else | |
88 | #define GCC_REALIGN_WORDS 1 | |
89 | #endif | |
90 | ||
acb4608a JP |
91 | static bool is_last_task_frame(struct unwind_state *state) |
92 | { | |
87a6b297 JP |
93 | unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2; |
94 | unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS; | |
acb4608a | 95 | |
8023e0e2 JP |
96 | /* |
97 | * We have to check for the last task frame at two different locations | |
98 | * because gcc can occasionally decide to realign the stack pointer and | |
87a6b297 JP |
99 | * change the offset of the stack frame in the prologue of a function |
100 | * called by head/entry code. Examples: | |
101 | * | |
102 | * <start_secondary>: | |
103 | * push %edi | |
104 | * lea 0x8(%esp),%edi | |
105 | * and $0xfffffff8,%esp | |
106 | * pushl -0x4(%edi) | |
107 | * push %ebp | |
108 | * mov %esp,%ebp | |
109 | * | |
110 | * <x86_64_start_kernel>: | |
111 | * lea 0x8(%rsp),%r10 | |
112 | * and $0xfffffffffffffff0,%rsp | |
113 | * pushq -0x8(%r10) | |
114 | * push %rbp | |
115 | * mov %rsp,%rbp | |
116 | * | |
117 | * Note that after aligning the stack, it pushes a duplicate copy of | |
118 | * the return address before pushing the frame pointer. | |
8023e0e2 | 119 | */ |
87a6b297 JP |
120 | return (state->bp == last_bp || |
121 | (state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1))); | |
acb4608a JP |
122 | } |
123 | ||
946c1911 JP |
124 | /* |
125 | * This determines if the frame pointer actually contains an encoded pointer to | |
126 | * pt_regs on the stack. See ENCODE_FRAME_POINTER. | |
127 | */ | |
128 | static struct pt_regs *decode_frame_pointer(unsigned long *bp) | |
129 | { | |
130 | unsigned long regs = (unsigned long)bp; | |
131 | ||
132 | if (!(regs & 0x1)) | |
133 | return NULL; | |
134 | ||
135 | return (struct pt_regs *)(regs & ~0x1); | |
136 | } | |
137 | ||
7c7900f8 JP |
138 | static bool update_stack_state(struct unwind_state *state, void *addr, |
139 | size_t len) | |
140 | { | |
141 | struct stack_info *info = &state->stack_info; | |
8b5e99f0 | 142 | enum stack_type orig_type = info->type; |
7c7900f8 JP |
143 | |
144 | /* | |
145 | * If addr isn't on the current stack, switch to the next one. | |
146 | * | |
147 | * We may have to traverse multiple stacks to deal with the possibility | |
148 | * that 'info->next_sp' could point to an empty stack and 'addr' could | |
149 | * be on a subsequent stack. | |
150 | */ | |
151 | while (!on_stack(info, addr, len)) | |
152 | if (get_stack_info(info->next_sp, state->task, info, | |
153 | &state->stack_mask)) | |
154 | return false; | |
155 | ||
8b5e99f0 JP |
156 | if (!state->orig_sp || info->type != orig_type) |
157 | state->orig_sp = addr; | |
158 | ||
7c7900f8 JP |
159 | return true; |
160 | } | |
161 | ||
162 | bool unwind_next_frame(struct unwind_state *state) | |
163 | { | |
946c1911 JP |
164 | struct pt_regs *regs; |
165 | unsigned long *next_bp, *next_frame; | |
166 | size_t next_len; | |
24d86f59 | 167 | enum stack_type prev_type = state->stack_info.type; |
7c7900f8 JP |
168 | |
169 | if (unwind_done(state)) | |
170 | return false; | |
171 | ||
946c1911 JP |
172 | /* have we reached the end? */ |
173 | if (state->regs && user_mode(state->regs)) | |
174 | goto the_end; | |
175 | ||
acb4608a JP |
176 | if (is_last_task_frame(state)) { |
177 | regs = task_pt_regs(state->task); | |
178 | ||
179 | /* | |
180 | * kthreads (other than the boot CPU's idle thread) have some | |
181 | * partial regs at the end of their stack which were placed | |
182 | * there by copy_thread_tls(). But the regs don't have any | |
183 | * useful information, so we can skip them. | |
184 | * | |
185 | * This user_mode() check is slightly broader than a PF_KTHREAD | |
186 | * check because it also catches the awkward situation where a | |
187 | * newly forked kthread transitions into a user task by calling | |
188 | * do_execve(), which eventually clears PF_KTHREAD. | |
189 | */ | |
190 | if (!user_mode(regs)) | |
191 | goto the_end; | |
192 | ||
193 | /* | |
194 | * We're almost at the end, but not quite: there's still the | |
195 | * syscall regs frame. Entry code doesn't encode the regs | |
196 | * pointer for syscalls, so we have to set it manually. | |
197 | */ | |
198 | state->regs = regs; | |
199 | state->bp = NULL; | |
200 | return true; | |
201 | } | |
202 | ||
946c1911 JP |
203 | /* get the next frame pointer */ |
204 | if (state->regs) | |
205 | next_bp = (unsigned long *)state->regs->bp; | |
206 | else | |
84936118 | 207 | next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task,*state->bp); |
946c1911 JP |
208 | |
209 | /* is the next frame pointer an encoded pointer to pt_regs? */ | |
210 | regs = decode_frame_pointer(next_bp); | |
211 | if (regs) { | |
212 | next_frame = (unsigned long *)regs; | |
213 | next_len = sizeof(*regs); | |
214 | } else { | |
215 | next_frame = next_bp; | |
216 | next_len = FRAME_HEADER_SIZE; | |
217 | } | |
7c7900f8 JP |
218 | |
219 | /* make sure the next frame's data is accessible */ | |
c32c47c6 JP |
220 | if (!update_stack_state(state, next_frame, next_len)) { |
221 | /* | |
222 | * Don't warn on bad regs->bp. An interrupt in entry code | |
223 | * might cause a false positive warning. | |
224 | */ | |
225 | if (state->regs) | |
226 | goto the_end; | |
227 | ||
228 | goto bad_address; | |
229 | } | |
230 | ||
24d86f59 JP |
231 | /* Make sure it only unwinds up and doesn't overlap the last frame: */ |
232 | if (state->stack_info.type == prev_type) { | |
233 | if (state->regs && (void *)next_frame < (void *)state->regs + regs_size(state->regs)) | |
234 | goto bad_address; | |
235 | ||
236 | if (state->bp && (void *)next_frame < (void *)state->bp + FRAME_HEADER_SIZE) | |
237 | goto bad_address; | |
238 | } | |
239 | ||
7c7900f8 | 240 | /* move to the next frame */ |
946c1911 JP |
241 | if (regs) { |
242 | state->regs = regs; | |
243 | state->bp = NULL; | |
244 | } else { | |
245 | state->bp = next_bp; | |
246 | state->regs = NULL; | |
247 | } | |
248 | ||
7c7900f8 | 249 | return true; |
946c1911 | 250 | |
c32c47c6 | 251 | bad_address: |
900742d8 JP |
252 | /* |
253 | * When unwinding a non-current task, the task might actually be | |
254 | * running on another CPU, in which case it could be modifying its | |
255 | * stack while we're reading it. This is generally not a problem and | |
256 | * can be ignored as long as the caller understands that unwinding | |
257 | * another task will not always succeed. | |
258 | */ | |
259 | if (state->task != current) | |
260 | goto the_end; | |
261 | ||
24d86f59 JP |
262 | if (state->regs) { |
263 | printk_deferred_once(KERN_WARNING | |
264 | "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", | |
265 | state->regs, state->task->comm, | |
266 | state->task->pid, next_frame); | |
8b5e99f0 | 267 | unwind_dump(state, (unsigned long *)state->regs); |
24d86f59 JP |
268 | } else { |
269 | printk_deferred_once(KERN_WARNING | |
270 | "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n", | |
271 | state->bp, state->task->comm, | |
272 | state->task->pid, next_frame); | |
8b5e99f0 | 273 | unwind_dump(state, state->bp); |
24d86f59 | 274 | } |
946c1911 JP |
275 | the_end: |
276 | state->stack_info.type = STACK_TYPE_UNKNOWN; | |
277 | return false; | |
7c7900f8 JP |
278 | } |
279 | EXPORT_SYMBOL_GPL(unwind_next_frame); | |
280 | ||
281 | void __unwind_start(struct unwind_state *state, struct task_struct *task, | |
282 | struct pt_regs *regs, unsigned long *first_frame) | |
283 | { | |
946c1911 JP |
284 | unsigned long *bp, *frame; |
285 | size_t len; | |
286 | ||
7c7900f8 JP |
287 | memset(state, 0, sizeof(*state)); |
288 | state->task = task; | |
289 | ||
290 | /* don't even attempt to start from user mode regs */ | |
291 | if (regs && user_mode(regs)) { | |
292 | state->stack_info.type = STACK_TYPE_UNKNOWN; | |
293 | return; | |
294 | } | |
295 | ||
296 | /* set up the starting stack frame */ | |
946c1911 JP |
297 | bp = get_frame_pointer(task, regs); |
298 | regs = decode_frame_pointer(bp); | |
299 | if (regs) { | |
300 | state->regs = regs; | |
301 | frame = (unsigned long *)regs; | |
302 | len = sizeof(*regs); | |
303 | } else { | |
304 | state->bp = bp; | |
305 | frame = bp; | |
306 | len = FRAME_HEADER_SIZE; | |
307 | } | |
7c7900f8 JP |
308 | |
309 | /* initialize stack info and make sure the frame data is accessible */ | |
946c1911 | 310 | get_stack_info(frame, state->task, &state->stack_info, |
7c7900f8 | 311 | &state->stack_mask); |
946c1911 | 312 | update_stack_state(state, frame, len); |
7c7900f8 JP |
313 | |
314 | /* | |
315 | * The caller can provide the address of the first frame directly | |
316 | * (first_frame) or indirectly (regs->sp) to indicate which stack frame | |
317 | * to start unwinding at. Skip ahead until we reach it. | |
318 | */ | |
319 | while (!unwind_done(state) && | |
320 | (!on_stack(&state->stack_info, first_frame, sizeof(long)) || | |
321 | state->bp < first_frame)) | |
322 | unwind_next_frame(state); | |
323 | } | |
324 | EXPORT_SYMBOL_GPL(__unwind_start); |