]>
Commit | Line | Data |
---|---|---|
7c7900f8 | 1 | #include <linux/sched.h> |
29930025 | 2 | #include <linux/sched/task.h> |
7c7900f8 JP |
3 | #include <asm/ptrace.h> |
4 | #include <asm/bitops.h> | |
5 | #include <asm/stacktrace.h> | |
6 | #include <asm/unwind.h> | |
7 | ||
8 | #define FRAME_HEADER_SIZE (sizeof(long) * 2) | |
9 | ||
84936118 JP |
10 | /* |
11 | * This disables KASAN checking when reading a value from another task's stack, | |
12 | * since the other task could be running on another CPU and could have poisoned | |
13 | * the stack in the meantime. | |
14 | */ | |
15 | #define READ_ONCE_TASK_STACK(task, x) \ | |
16 | ({ \ | |
17 | unsigned long val; \ | |
18 | if (task == current) \ | |
19 | val = READ_ONCE(x); \ | |
20 | else \ | |
21 | val = READ_ONCE_NOCHECK(x); \ | |
22 | val; \ | |
23 | }) | |
24 | ||
8b5e99f0 JP |
25 | static void unwind_dump(struct unwind_state *state, unsigned long *sp) |
26 | { | |
27 | static bool dumped_before = false; | |
28 | bool prev_zero, zero = false; | |
29 | unsigned long word; | |
30 | ||
31 | if (dumped_before) | |
32 | return; | |
33 | ||
34 | dumped_before = true; | |
35 | ||
36 | printk_deferred("unwind stack type:%d next_sp:%p mask:%lx graph_idx:%d\n", | |
37 | state->stack_info.type, state->stack_info.next_sp, | |
38 | state->stack_mask, state->graph_idx); | |
39 | ||
40 | for (sp = state->orig_sp; sp < state->stack_info.end; sp++) { | |
41 | word = READ_ONCE_NOCHECK(*sp); | |
42 | ||
43 | prev_zero = zero; | |
44 | zero = word == 0; | |
45 | ||
46 | if (zero) { | |
47 | if (!prev_zero) | |
48 | printk_deferred("%p: %016x ...\n", sp, 0); | |
49 | continue; | |
50 | } | |
51 | ||
52 | printk_deferred("%p: %016lx (%pB)\n", sp, word, (void *)word); | |
53 | } | |
54 | } | |
55 | ||
7c7900f8 JP |
56 | unsigned long unwind_get_return_address(struct unwind_state *state) |
57 | { | |
58 | unsigned long addr; | |
59 | unsigned long *addr_p = unwind_get_return_address_ptr(state); | |
60 | ||
61 | if (unwind_done(state)) | |
62 | return 0; | |
63 | ||
946c1911 JP |
64 | if (state->regs && user_mode(state->regs)) |
65 | return 0; | |
66 | ||
84936118 JP |
67 | addr = READ_ONCE_TASK_STACK(state->task, *addr_p); |
68 | addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr, | |
7c7900f8 JP |
69 | addr_p); |
70 | ||
c280f773 | 71 | return __kernel_text_address(addr) ? addr : 0; |
7c7900f8 JP |
72 | } |
73 | EXPORT_SYMBOL_GPL(unwind_get_return_address); | |
74 | ||
24d86f59 JP |
75 | static size_t regs_size(struct pt_regs *regs) |
76 | { | |
77 | /* x86_32 regs from kernel mode are two words shorter: */ | |
78 | if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs)) | |
79 | return sizeof(*regs) - 2*sizeof(long); | |
80 | ||
81 | return sizeof(*regs); | |
82 | } | |
83 | ||
acb4608a JP |
84 | static bool is_last_task_frame(struct unwind_state *state) |
85 | { | |
86 | unsigned long bp = (unsigned long)state->bp; | |
87 | unsigned long regs = (unsigned long)task_pt_regs(state->task); | |
88 | ||
8023e0e2 JP |
89 | /* |
90 | * We have to check for the last task frame at two different locations | |
91 | * because gcc can occasionally decide to realign the stack pointer and | |
92 | * change the offset of the stack frame by a word in the prologue of a | |
93 | * function called by head/entry code. | |
94 | */ | |
95 | return bp == regs - FRAME_HEADER_SIZE || | |
96 | bp == regs - FRAME_HEADER_SIZE - sizeof(long); | |
acb4608a JP |
97 | } |
98 | ||
946c1911 JP |
99 | /* |
100 | * This determines if the frame pointer actually contains an encoded pointer to | |
101 | * pt_regs on the stack. See ENCODE_FRAME_POINTER. | |
102 | */ | |
103 | static struct pt_regs *decode_frame_pointer(unsigned long *bp) | |
104 | { | |
105 | unsigned long regs = (unsigned long)bp; | |
106 | ||
107 | if (!(regs & 0x1)) | |
108 | return NULL; | |
109 | ||
110 | return (struct pt_regs *)(regs & ~0x1); | |
111 | } | |
112 | ||
7c7900f8 JP |
113 | static bool update_stack_state(struct unwind_state *state, void *addr, |
114 | size_t len) | |
115 | { | |
116 | struct stack_info *info = &state->stack_info; | |
8b5e99f0 | 117 | enum stack_type orig_type = info->type; |
7c7900f8 JP |
118 | |
119 | /* | |
120 | * If addr isn't on the current stack, switch to the next one. | |
121 | * | |
122 | * We may have to traverse multiple stacks to deal with the possibility | |
123 | * that 'info->next_sp' could point to an empty stack and 'addr' could | |
124 | * be on a subsequent stack. | |
125 | */ | |
126 | while (!on_stack(info, addr, len)) | |
127 | if (get_stack_info(info->next_sp, state->task, info, | |
128 | &state->stack_mask)) | |
129 | return false; | |
130 | ||
8b5e99f0 JP |
131 | if (!state->orig_sp || info->type != orig_type) |
132 | state->orig_sp = addr; | |
133 | ||
7c7900f8 JP |
134 | return true; |
135 | } | |
136 | ||
137 | bool unwind_next_frame(struct unwind_state *state) | |
138 | { | |
946c1911 JP |
139 | struct pt_regs *regs; |
140 | unsigned long *next_bp, *next_frame; | |
141 | size_t next_len; | |
24d86f59 | 142 | enum stack_type prev_type = state->stack_info.type; |
7c7900f8 JP |
143 | |
144 | if (unwind_done(state)) | |
145 | return false; | |
146 | ||
946c1911 JP |
147 | /* have we reached the end? */ |
148 | if (state->regs && user_mode(state->regs)) | |
149 | goto the_end; | |
150 | ||
acb4608a JP |
151 | if (is_last_task_frame(state)) { |
152 | regs = task_pt_regs(state->task); | |
153 | ||
154 | /* | |
155 | * kthreads (other than the boot CPU's idle thread) have some | |
156 | * partial regs at the end of their stack which were placed | |
157 | * there by copy_thread_tls(). But the regs don't have any | |
158 | * useful information, so we can skip them. | |
159 | * | |
160 | * This user_mode() check is slightly broader than a PF_KTHREAD | |
161 | * check because it also catches the awkward situation where a | |
162 | * newly forked kthread transitions into a user task by calling | |
163 | * do_execve(), which eventually clears PF_KTHREAD. | |
164 | */ | |
165 | if (!user_mode(regs)) | |
166 | goto the_end; | |
167 | ||
168 | /* | |
169 | * We're almost at the end, but not quite: there's still the | |
170 | * syscall regs frame. Entry code doesn't encode the regs | |
171 | * pointer for syscalls, so we have to set it manually. | |
172 | */ | |
173 | state->regs = regs; | |
174 | state->bp = NULL; | |
175 | return true; | |
176 | } | |
177 | ||
946c1911 JP |
178 | /* get the next frame pointer */ |
179 | if (state->regs) | |
180 | next_bp = (unsigned long *)state->regs->bp; | |
181 | else | |
84936118 | 182 | next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task,*state->bp); |
946c1911 JP |
183 | |
184 | /* is the next frame pointer an encoded pointer to pt_regs? */ | |
185 | regs = decode_frame_pointer(next_bp); | |
186 | if (regs) { | |
187 | next_frame = (unsigned long *)regs; | |
188 | next_len = sizeof(*regs); | |
189 | } else { | |
190 | next_frame = next_bp; | |
191 | next_len = FRAME_HEADER_SIZE; | |
192 | } | |
7c7900f8 JP |
193 | |
194 | /* make sure the next frame's data is accessible */ | |
c32c47c6 JP |
195 | if (!update_stack_state(state, next_frame, next_len)) { |
196 | /* | |
197 | * Don't warn on bad regs->bp. An interrupt in entry code | |
198 | * might cause a false positive warning. | |
199 | */ | |
200 | if (state->regs) | |
201 | goto the_end; | |
202 | ||
203 | goto bad_address; | |
204 | } | |
205 | ||
24d86f59 JP |
206 | /* Make sure it only unwinds up and doesn't overlap the last frame: */ |
207 | if (state->stack_info.type == prev_type) { | |
208 | if (state->regs && (void *)next_frame < (void *)state->regs + regs_size(state->regs)) | |
209 | goto bad_address; | |
210 | ||
211 | if (state->bp && (void *)next_frame < (void *)state->bp + FRAME_HEADER_SIZE) | |
212 | goto bad_address; | |
213 | } | |
214 | ||
7c7900f8 | 215 | /* move to the next frame */ |
946c1911 JP |
216 | if (regs) { |
217 | state->regs = regs; | |
218 | state->bp = NULL; | |
219 | } else { | |
220 | state->bp = next_bp; | |
221 | state->regs = NULL; | |
222 | } | |
223 | ||
7c7900f8 | 224 | return true; |
946c1911 | 225 | |
c32c47c6 | 226 | bad_address: |
900742d8 JP |
227 | /* |
228 | * When unwinding a non-current task, the task might actually be | |
229 | * running on another CPU, in which case it could be modifying its | |
230 | * stack while we're reading it. This is generally not a problem and | |
231 | * can be ignored as long as the caller understands that unwinding | |
232 | * another task will not always succeed. | |
233 | */ | |
234 | if (state->task != current) | |
235 | goto the_end; | |
236 | ||
24d86f59 JP |
237 | if (state->regs) { |
238 | printk_deferred_once(KERN_WARNING | |
239 | "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", | |
240 | state->regs, state->task->comm, | |
241 | state->task->pid, next_frame); | |
8b5e99f0 | 242 | unwind_dump(state, (unsigned long *)state->regs); |
24d86f59 JP |
243 | } else { |
244 | printk_deferred_once(KERN_WARNING | |
245 | "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n", | |
246 | state->bp, state->task->comm, | |
247 | state->task->pid, next_frame); | |
8b5e99f0 | 248 | unwind_dump(state, state->bp); |
24d86f59 | 249 | } |
946c1911 JP |
250 | the_end: |
251 | state->stack_info.type = STACK_TYPE_UNKNOWN; | |
252 | return false; | |
7c7900f8 JP |
253 | } |
254 | EXPORT_SYMBOL_GPL(unwind_next_frame); | |
255 | ||
256 | void __unwind_start(struct unwind_state *state, struct task_struct *task, | |
257 | struct pt_regs *regs, unsigned long *first_frame) | |
258 | { | |
946c1911 JP |
259 | unsigned long *bp, *frame; |
260 | size_t len; | |
261 | ||
7c7900f8 JP |
262 | memset(state, 0, sizeof(*state)); |
263 | state->task = task; | |
264 | ||
265 | /* don't even attempt to start from user mode regs */ | |
266 | if (regs && user_mode(regs)) { | |
267 | state->stack_info.type = STACK_TYPE_UNKNOWN; | |
268 | return; | |
269 | } | |
270 | ||
271 | /* set up the starting stack frame */ | |
946c1911 JP |
272 | bp = get_frame_pointer(task, regs); |
273 | regs = decode_frame_pointer(bp); | |
274 | if (regs) { | |
275 | state->regs = regs; | |
276 | frame = (unsigned long *)regs; | |
277 | len = sizeof(*regs); | |
278 | } else { | |
279 | state->bp = bp; | |
280 | frame = bp; | |
281 | len = FRAME_HEADER_SIZE; | |
282 | } | |
7c7900f8 JP |
283 | |
284 | /* initialize stack info and make sure the frame data is accessible */ | |
946c1911 | 285 | get_stack_info(frame, state->task, &state->stack_info, |
7c7900f8 | 286 | &state->stack_mask); |
946c1911 | 287 | update_stack_state(state, frame, len); |
7c7900f8 JP |
288 | |
289 | /* | |
290 | * The caller can provide the address of the first frame directly | |
291 | * (first_frame) or indirectly (regs->sp) to indicate which stack frame | |
292 | * to start unwinding at. Skip ahead until we reach it. | |
293 | */ | |
294 | while (!unwind_done(state) && | |
295 | (!on_stack(&state->stack_info, first_frame, sizeof(long)) || | |
296 | state->bp < first_frame)) | |
297 | unwind_next_frame(state); | |
298 | } | |
299 | EXPORT_SYMBOL_GPL(__unwind_start); |