]>
Commit | Line | Data |
---|---|---|
7c7900f8 | 1 | #include <linux/sched.h> |
29930025 | 2 | #include <linux/sched/task.h> |
68db0cf1 | 3 | #include <linux/sched/task_stack.h> |
7c7900f8 JP |
4 | #include <asm/ptrace.h> |
5 | #include <asm/bitops.h> | |
6 | #include <asm/stacktrace.h> | |
7 | #include <asm/unwind.h> | |
8 | ||
9 | #define FRAME_HEADER_SIZE (sizeof(long) * 2) | |
10 | ||
84936118 JP |
11 | /* |
12 | * This disables KASAN checking when reading a value from another task's stack, | |
13 | * since the other task could be running on another CPU and could have poisoned | |
14 | * the stack in the meantime. | |
15 | */ | |
16 | #define READ_ONCE_TASK_STACK(task, x) \ | |
17 | ({ \ | |
18 | unsigned long val; \ | |
19 | if (task == current) \ | |
20 | val = READ_ONCE(x); \ | |
21 | else \ | |
22 | val = READ_ONCE_NOCHECK(x); \ | |
23 | val; \ | |
24 | }) | |
25 | ||
8b5e99f0 JP |
26 | static void unwind_dump(struct unwind_state *state, unsigned long *sp) |
27 | { | |
28 | static bool dumped_before = false; | |
29 | bool prev_zero, zero = false; | |
30 | unsigned long word; | |
31 | ||
32 | if (dumped_before) | |
33 | return; | |
34 | ||
35 | dumped_before = true; | |
36 | ||
37 | printk_deferred("unwind stack type:%d next_sp:%p mask:%lx graph_idx:%d\n", | |
38 | state->stack_info.type, state->stack_info.next_sp, | |
39 | state->stack_mask, state->graph_idx); | |
40 | ||
41 | for (sp = state->orig_sp; sp < state->stack_info.end; sp++) { | |
42 | word = READ_ONCE_NOCHECK(*sp); | |
43 | ||
44 | prev_zero = zero; | |
45 | zero = word == 0; | |
46 | ||
47 | if (zero) { | |
48 | if (!prev_zero) | |
49 | printk_deferred("%p: %016x ...\n", sp, 0); | |
50 | continue; | |
51 | } | |
52 | ||
53 | printk_deferred("%p: %016lx (%pB)\n", sp, word, (void *)word); | |
54 | } | |
55 | } | |
56 | ||
7c7900f8 JP |
57 | unsigned long unwind_get_return_address(struct unwind_state *state) |
58 | { | |
59 | unsigned long addr; | |
60 | unsigned long *addr_p = unwind_get_return_address_ptr(state); | |
61 | ||
62 | if (unwind_done(state)) | |
63 | return 0; | |
64 | ||
946c1911 JP |
65 | if (state->regs && user_mode(state->regs)) |
66 | return 0; | |
67 | ||
84936118 JP |
68 | addr = READ_ONCE_TASK_STACK(state->task, *addr_p); |
69 | addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr, | |
7c7900f8 JP |
70 | addr_p); |
71 | ||
c280f773 | 72 | return __kernel_text_address(addr) ? addr : 0; |
7c7900f8 JP |
73 | } |
74 | EXPORT_SYMBOL_GPL(unwind_get_return_address); | |
75 | ||
24d86f59 JP |
76 | static size_t regs_size(struct pt_regs *regs) |
77 | { | |
78 | /* x86_32 regs from kernel mode are two words shorter: */ | |
79 | if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs)) | |
80 | return sizeof(*regs) - 2*sizeof(long); | |
81 | ||
82 | return sizeof(*regs); | |
83 | } | |
84 | ||
acb4608a JP |
85 | static bool is_last_task_frame(struct unwind_state *state) |
86 | { | |
87 | unsigned long bp = (unsigned long)state->bp; | |
88 | unsigned long regs = (unsigned long)task_pt_regs(state->task); | |
89 | ||
8023e0e2 JP |
90 | /* |
91 | * We have to check for the last task frame at two different locations | |
92 | * because gcc can occasionally decide to realign the stack pointer and | |
93 | * change the offset of the stack frame by a word in the prologue of a | |
94 | * function called by head/entry code. | |
95 | */ | |
96 | return bp == regs - FRAME_HEADER_SIZE || | |
97 | bp == regs - FRAME_HEADER_SIZE - sizeof(long); | |
acb4608a JP |
98 | } |
99 | ||
946c1911 JP |
100 | /* |
101 | * This determines if the frame pointer actually contains an encoded pointer to | |
102 | * pt_regs on the stack. See ENCODE_FRAME_POINTER. | |
103 | */ | |
104 | static struct pt_regs *decode_frame_pointer(unsigned long *bp) | |
105 | { | |
106 | unsigned long regs = (unsigned long)bp; | |
107 | ||
108 | if (!(regs & 0x1)) | |
109 | return NULL; | |
110 | ||
111 | return (struct pt_regs *)(regs & ~0x1); | |
112 | } | |
113 | ||
7c7900f8 JP |
114 | static bool update_stack_state(struct unwind_state *state, void *addr, |
115 | size_t len) | |
116 | { | |
117 | struct stack_info *info = &state->stack_info; | |
8b5e99f0 | 118 | enum stack_type orig_type = info->type; |
7c7900f8 JP |
119 | |
120 | /* | |
121 | * If addr isn't on the current stack, switch to the next one. | |
122 | * | |
123 | * We may have to traverse multiple stacks to deal with the possibility | |
124 | * that 'info->next_sp' could point to an empty stack and 'addr' could | |
125 | * be on a subsequent stack. | |
126 | */ | |
127 | while (!on_stack(info, addr, len)) | |
128 | if (get_stack_info(info->next_sp, state->task, info, | |
129 | &state->stack_mask)) | |
130 | return false; | |
131 | ||
8b5e99f0 JP |
132 | if (!state->orig_sp || info->type != orig_type) |
133 | state->orig_sp = addr; | |
134 | ||
7c7900f8 JP |
135 | return true; |
136 | } | |
137 | ||
138 | bool unwind_next_frame(struct unwind_state *state) | |
139 | { | |
946c1911 JP |
140 | struct pt_regs *regs; |
141 | unsigned long *next_bp, *next_frame; | |
142 | size_t next_len; | |
24d86f59 | 143 | enum stack_type prev_type = state->stack_info.type; |
7c7900f8 JP |
144 | |
145 | if (unwind_done(state)) | |
146 | return false; | |
147 | ||
946c1911 JP |
148 | /* have we reached the end? */ |
149 | if (state->regs && user_mode(state->regs)) | |
150 | goto the_end; | |
151 | ||
acb4608a JP |
152 | if (is_last_task_frame(state)) { |
153 | regs = task_pt_regs(state->task); | |
154 | ||
155 | /* | |
156 | * kthreads (other than the boot CPU's idle thread) have some | |
157 | * partial regs at the end of their stack which were placed | |
158 | * there by copy_thread_tls(). But the regs don't have any | |
159 | * useful information, so we can skip them. | |
160 | * | |
161 | * This user_mode() check is slightly broader than a PF_KTHREAD | |
162 | * check because it also catches the awkward situation where a | |
163 | * newly forked kthread transitions into a user task by calling | |
164 | * do_execve(), which eventually clears PF_KTHREAD. | |
165 | */ | |
166 | if (!user_mode(regs)) | |
167 | goto the_end; | |
168 | ||
169 | /* | |
170 | * We're almost at the end, but not quite: there's still the | |
171 | * syscall regs frame. Entry code doesn't encode the regs | |
172 | * pointer for syscalls, so we have to set it manually. | |
173 | */ | |
174 | state->regs = regs; | |
175 | state->bp = NULL; | |
176 | return true; | |
177 | } | |
178 | ||
946c1911 JP |
179 | /* get the next frame pointer */ |
180 | if (state->regs) | |
181 | next_bp = (unsigned long *)state->regs->bp; | |
182 | else | |
84936118 | 183 | next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task,*state->bp); |
946c1911 JP |
184 | |
185 | /* is the next frame pointer an encoded pointer to pt_regs? */ | |
186 | regs = decode_frame_pointer(next_bp); | |
187 | if (regs) { | |
188 | next_frame = (unsigned long *)regs; | |
189 | next_len = sizeof(*regs); | |
190 | } else { | |
191 | next_frame = next_bp; | |
192 | next_len = FRAME_HEADER_SIZE; | |
193 | } | |
7c7900f8 JP |
194 | |
195 | /* make sure the next frame's data is accessible */ | |
c32c47c6 JP |
196 | if (!update_stack_state(state, next_frame, next_len)) { |
197 | /* | |
198 | * Don't warn on bad regs->bp. An interrupt in entry code | |
199 | * might cause a false positive warning. | |
200 | */ | |
201 | if (state->regs) | |
202 | goto the_end; | |
203 | ||
204 | goto bad_address; | |
205 | } | |
206 | ||
24d86f59 JP |
207 | /* Make sure it only unwinds up and doesn't overlap the last frame: */ |
208 | if (state->stack_info.type == prev_type) { | |
209 | if (state->regs && (void *)next_frame < (void *)state->regs + regs_size(state->regs)) | |
210 | goto bad_address; | |
211 | ||
212 | if (state->bp && (void *)next_frame < (void *)state->bp + FRAME_HEADER_SIZE) | |
213 | goto bad_address; | |
214 | } | |
215 | ||
7c7900f8 | 216 | /* move to the next frame */ |
946c1911 JP |
217 | if (regs) { |
218 | state->regs = regs; | |
219 | state->bp = NULL; | |
220 | } else { | |
221 | state->bp = next_bp; | |
222 | state->regs = NULL; | |
223 | } | |
224 | ||
7c7900f8 | 225 | return true; |
946c1911 | 226 | |
c32c47c6 | 227 | bad_address: |
900742d8 JP |
228 | /* |
229 | * When unwinding a non-current task, the task might actually be | |
230 | * running on another CPU, in which case it could be modifying its | |
231 | * stack while we're reading it. This is generally not a problem and | |
232 | * can be ignored as long as the caller understands that unwinding | |
233 | * another task will not always succeed. | |
234 | */ | |
235 | if (state->task != current) | |
236 | goto the_end; | |
237 | ||
24d86f59 JP |
238 | if (state->regs) { |
239 | printk_deferred_once(KERN_WARNING | |
240 | "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", | |
241 | state->regs, state->task->comm, | |
242 | state->task->pid, next_frame); | |
8b5e99f0 | 243 | unwind_dump(state, (unsigned long *)state->regs); |
24d86f59 JP |
244 | } else { |
245 | printk_deferred_once(KERN_WARNING | |
246 | "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n", | |
247 | state->bp, state->task->comm, | |
248 | state->task->pid, next_frame); | |
8b5e99f0 | 249 | unwind_dump(state, state->bp); |
24d86f59 | 250 | } |
946c1911 JP |
251 | the_end: |
252 | state->stack_info.type = STACK_TYPE_UNKNOWN; | |
253 | return false; | |
7c7900f8 JP |
254 | } |
255 | EXPORT_SYMBOL_GPL(unwind_next_frame); | |
256 | ||
257 | void __unwind_start(struct unwind_state *state, struct task_struct *task, | |
258 | struct pt_regs *regs, unsigned long *first_frame) | |
259 | { | |
946c1911 JP |
260 | unsigned long *bp, *frame; |
261 | size_t len; | |
262 | ||
7c7900f8 JP |
263 | memset(state, 0, sizeof(*state)); |
264 | state->task = task; | |
265 | ||
266 | /* don't even attempt to start from user mode regs */ | |
267 | if (regs && user_mode(regs)) { | |
268 | state->stack_info.type = STACK_TYPE_UNKNOWN; | |
269 | return; | |
270 | } | |
271 | ||
272 | /* set up the starting stack frame */ | |
946c1911 JP |
273 | bp = get_frame_pointer(task, regs); |
274 | regs = decode_frame_pointer(bp); | |
275 | if (regs) { | |
276 | state->regs = regs; | |
277 | frame = (unsigned long *)regs; | |
278 | len = sizeof(*regs); | |
279 | } else { | |
280 | state->bp = bp; | |
281 | frame = bp; | |
282 | len = FRAME_HEADER_SIZE; | |
283 | } | |
7c7900f8 JP |
284 | |
285 | /* initialize stack info and make sure the frame data is accessible */ | |
946c1911 | 286 | get_stack_info(frame, state->task, &state->stack_info, |
7c7900f8 | 287 | &state->stack_mask); |
946c1911 | 288 | update_stack_state(state, frame, len); |
7c7900f8 JP |
289 | |
290 | /* | |
291 | * The caller can provide the address of the first frame directly | |
292 | * (first_frame) or indirectly (regs->sp) to indicate which stack frame | |
293 | * to start unwinding at. Skip ahead until we reach it. | |
294 | */ | |
295 | while (!unwind_done(state) && | |
296 | (!on_stack(&state->stack_info, first_frame, sizeof(long)) || | |
297 | state->bp < first_frame)) | |
298 | unwind_next_frame(state); | |
299 | } | |
300 | EXPORT_SYMBOL_GPL(__unwind_start); |