]>
Commit | Line | Data |
---|---|---|
bfc077b4 HK |
1 | /* |
2 | * bpf-prologue.c | |
3 | * | |
4 | * Copyright (C) 2015 He Kuang <hekuang@huawei.com> | |
5 | * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> | |
6 | * Copyright (C) 2015 Huawei Inc. | |
7 | */ | |
8 | ||
9 | #include <bpf/libbpf.h> | |
10 | #include "perf.h" | |
11 | #include "debug.h" | |
12 | #include "bpf-loader.h" | |
13 | #include "bpf-prologue.h" | |
14 | #include "probe-finder.h" | |
15 | #include <dwarf-regs.h> | |
16 | #include <linux/filter.h> | |
17 | ||
18 | #define BPF_REG_SIZE 8 | |
19 | ||
20 | #define JMP_TO_ERROR_CODE -1 | |
21 | #define JMP_TO_SUCCESS_CODE -2 | |
22 | #define JMP_TO_USER_CODE -3 | |
23 | ||
24 | struct bpf_insn_pos { | |
25 | struct bpf_insn *begin; | |
26 | struct bpf_insn *end; | |
27 | struct bpf_insn *pos; | |
28 | }; | |
29 | ||
30 | static inline int | |
31 | pos_get_cnt(struct bpf_insn_pos *pos) | |
32 | { | |
33 | return pos->pos - pos->begin; | |
34 | } | |
35 | ||
36 | static int | |
37 | append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos) | |
38 | { | |
39 | if (!pos->pos) | |
40 | return -BPF_LOADER_ERRNO__PROLOGUE2BIG; | |
41 | ||
42 | if (pos->pos + 1 >= pos->end) { | |
43 | pr_err("bpf prologue: prologue too long\n"); | |
44 | pos->pos = NULL; | |
45 | return -BPF_LOADER_ERRNO__PROLOGUE2BIG; | |
46 | } | |
47 | ||
48 | *(pos->pos)++ = new_insn; | |
49 | return 0; | |
50 | } | |
51 | ||
52 | static int | |
53 | check_pos(struct bpf_insn_pos *pos) | |
54 | { | |
55 | if (!pos->pos || pos->pos >= pos->end) | |
56 | return -BPF_LOADER_ERRNO__PROLOGUE2BIG; | |
57 | return 0; | |
58 | } | |
59 | ||
60 | /* Give it a shorter name */ | |
61 | #define ins(i, p) append_insn((i), (p)) | |
62 | ||
63 | /* | |
64 | * Give a register name (in 'reg'), generate instruction to | |
65 | * load register into an eBPF register rd: | |
66 | * 'ldd target_reg, offset(ctx_reg)', where: | |
67 | * ctx_reg is pre initialized to pointer of 'struct pt_regs'. | |
68 | */ | |
69 | static int | |
70 | gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg, | |
71 | const char *reg, int target_reg) | |
72 | { | |
73 | int offset = regs_query_register_offset(reg); | |
74 | ||
75 | if (offset < 0) { | |
76 | pr_err("bpf: prologue: failed to get register %s\n", | |
77 | reg); | |
78 | return offset; | |
79 | } | |
80 | ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos); | |
81 | ||
82 | return check_pos(pos); | |
83 | } | |
84 | ||
85 | /* | |
86 | * Generate a BPF_FUNC_probe_read function call. | |
87 | * | |
88 | * src_base_addr_reg is a register holding base address, | |
89 | * dst_addr_reg is a register holding dest address (on stack), | |
90 | * result is: | |
91 | * | |
92 | * *[dst_addr_reg] = *([src_base_addr_reg] + offset) | |
93 | * | |
94 | * Arguments of BPF_FUNC_probe_read: | |
95 | * ARG1: ptr to stack (dest) | |
96 | * ARG2: size (8) | |
97 | * ARG3: unsafe ptr (src) | |
98 | */ | |
99 | static int | |
100 | gen_read_mem(struct bpf_insn_pos *pos, | |
101 | int src_base_addr_reg, | |
102 | int dst_addr_reg, | |
103 | long offset) | |
104 | { | |
105 | /* mov arg3, src_base_addr_reg */ | |
106 | if (src_base_addr_reg != BPF_REG_ARG3) | |
107 | ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos); | |
108 | /* add arg3, #offset */ | |
109 | if (offset) | |
110 | ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos); | |
111 | ||
112 | /* mov arg2, #reg_size */ | |
113 | ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos); | |
114 | ||
115 | /* mov arg1, dst_addr_reg */ | |
116 | if (dst_addr_reg != BPF_REG_ARG1) | |
117 | ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos); | |
118 | ||
119 | /* Call probe_read */ | |
120 | ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos); | |
121 | /* | |
122 | * Error processing: if read fail, goto error code, | |
123 | * will be relocated. Target should be the start of | |
124 | * error processing code. | |
125 | */ | |
126 | ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE), | |
127 | pos); | |
128 | ||
129 | return check_pos(pos); | |
130 | } | |
131 | ||
132 | /* | |
133 | * Each arg should be bare register. Fetch and save them into argument | |
134 | * registers (r3 - r5). | |
135 | * | |
136 | * BPF_REG_1 should have been initialized with pointer to | |
137 | * 'struct pt_regs'. | |
138 | */ | |
139 | static int | |
140 | gen_prologue_fastpath(struct bpf_insn_pos *pos, | |
141 | struct probe_trace_arg *args, int nargs) | |
142 | { | |
143 | int i, err = 0; | |
144 | ||
145 | for (i = 0; i < nargs; i++) { | |
146 | err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value, | |
147 | BPF_PROLOGUE_START_ARG_REG + i); | |
148 | if (err) | |
149 | goto errout; | |
150 | } | |
151 | ||
152 | return check_pos(pos); | |
153 | errout: | |
154 | return err; | |
155 | } | |
156 | ||
157 | /* | |
158 | * Slow path: | |
159 | * At least one argument has the form of 'offset($rx)'. | |
160 | * | |
161 | * Following code first stores them into stack, then loads all of then | |
162 | * to r2 - r5. | |
163 | * Before final loading, the final result should be: | |
164 | * | |
165 | * low address | |
166 | * BPF_REG_FP - 24 ARG3 | |
167 | * BPF_REG_FP - 16 ARG2 | |
168 | * BPF_REG_FP - 8 ARG1 | |
169 | * BPF_REG_FP | |
170 | * high address | |
171 | * | |
172 | * For each argument (described as: offn(...off2(off1(reg)))), | |
173 | * generates following code: | |
174 | * | |
175 | * r7 <- fp | |
176 | * r7 <- r7 - stack_offset // Ideal code should initialize r7 using | |
177 | * // fp before generating args. However, | |
178 | * // eBPF won't regard r7 as stack pointer | |
179 | * // if it is generated by minus 8 from | |
180 | * // another stack pointer except fp. | |
181 | * // This is why we have to set r7 | |
182 | * // to fp for each variable. | |
183 | * r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx() | |
184 | * (r7) <- r3 // skip following instructions for bare reg | |
185 | * r3 <- r3 + off1 . // skip if off1 == 0 | |
186 | * r2 <- 8 \ | |
187 | * r1 <- r7 |-> generated by gen_read_mem() | |
188 | * call probe_read / | |
189 | * jnei r0, 0, err ./ | |
190 | * r3 <- (r7) | |
191 | * r3 <- r3 + off2 . // skip if off2 == 0 | |
192 | * r2 <- 8 \ // r2 may be broken by probe_read, so set again | |
193 | * r1 <- r7 |-> generated by gen_read_mem() | |
194 | * call probe_read / | |
195 | * jnei r0, 0, err ./ | |
196 | * ... | |
197 | */ | |
198 | static int | |
199 | gen_prologue_slowpath(struct bpf_insn_pos *pos, | |
200 | struct probe_trace_arg *args, int nargs) | |
201 | { | |
202 | int err, i; | |
203 | ||
204 | for (i = 0; i < nargs; i++) { | |
205 | struct probe_trace_arg *arg = &args[i]; | |
206 | const char *reg = arg->value; | |
207 | struct probe_trace_arg_ref *ref = NULL; | |
208 | int stack_offset = (i + 1) * -8; | |
209 | ||
210 | pr_debug("prologue: fetch arg %d, base reg is %s\n", | |
211 | i, reg); | |
212 | ||
213 | /* value of base register is stored into ARG3 */ | |
214 | err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg, | |
215 | BPF_REG_ARG3); | |
216 | if (err) { | |
217 | pr_err("prologue: failed to get offset of register %s\n", | |
218 | reg); | |
219 | goto errout; | |
220 | } | |
221 | ||
222 | /* Make r7 the stack pointer. */ | |
223 | ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos); | |
224 | /* r7 += -8 */ | |
225 | ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos); | |
226 | /* | |
227 | * Store r3 (base register) onto stack | |
228 | * Ensure fp[offset] is set. | |
229 | * fp is the only valid base register when storing | |
230 | * into stack. We are not allowed to use r7 as base | |
231 | * register here. | |
232 | */ | |
233 | ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3, | |
234 | stack_offset), pos); | |
235 | ||
236 | ref = arg->ref; | |
237 | while (ref) { | |
238 | pr_debug("prologue: arg %d: offset %ld\n", | |
239 | i, ref->offset); | |
240 | err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7, | |
241 | ref->offset); | |
242 | if (err) { | |
243 | pr_err("prologue: failed to generate probe_read function call\n"); | |
244 | goto errout; | |
245 | } | |
246 | ||
247 | ref = ref->next; | |
248 | /* | |
249 | * Load previous result into ARG3. Use | |
250 | * BPF_REG_FP instead of r7 because verifier | |
251 | * allows FP based addressing only. | |
252 | */ | |
253 | if (ref) | |
254 | ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3, | |
255 | BPF_REG_FP, stack_offset), pos); | |
256 | } | |
257 | } | |
258 | ||
259 | /* Final pass: read to registers */ | |
260 | for (i = 0; i < nargs; i++) | |
261 | ins(BPF_LDX_MEM(BPF_DW, BPF_PROLOGUE_START_ARG_REG + i, | |
262 | BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos); | |
263 | ||
264 | ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos); | |
265 | ||
266 | return check_pos(pos); | |
267 | errout: | |
268 | return err; | |
269 | } | |
270 | ||
271 | static int | |
272 | prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code, | |
273 | struct bpf_insn *success_code, struct bpf_insn *user_code) | |
274 | { | |
275 | struct bpf_insn *insn; | |
276 | ||
277 | if (check_pos(pos)) | |
278 | return -BPF_LOADER_ERRNO__PROLOGUE2BIG; | |
279 | ||
280 | for (insn = pos->begin; insn < pos->pos; insn++) { | |
281 | struct bpf_insn *target; | |
282 | u8 class = BPF_CLASS(insn->code); | |
283 | u8 opcode; | |
284 | ||
285 | if (class != BPF_JMP) | |
286 | continue; | |
287 | opcode = BPF_OP(insn->code); | |
288 | if (opcode == BPF_CALL) | |
289 | continue; | |
290 | ||
291 | switch (insn->off) { | |
292 | case JMP_TO_ERROR_CODE: | |
293 | target = error_code; | |
294 | break; | |
295 | case JMP_TO_SUCCESS_CODE: | |
296 | target = success_code; | |
297 | break; | |
298 | case JMP_TO_USER_CODE: | |
299 | target = user_code; | |
300 | break; | |
301 | default: | |
302 | pr_err("bpf prologue: internal error: relocation failed\n"); | |
303 | return -BPF_LOADER_ERRNO__PROLOGUE; | |
304 | } | |
305 | ||
306 | insn->off = target - (insn + 1); | |
307 | } | |
308 | return 0; | |
309 | } | |
310 | ||
311 | int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, | |
312 | struct bpf_insn *new_prog, size_t *new_cnt, | |
313 | size_t cnt_space) | |
314 | { | |
315 | struct bpf_insn *success_code = NULL; | |
316 | struct bpf_insn *error_code = NULL; | |
317 | struct bpf_insn *user_code = NULL; | |
318 | struct bpf_insn_pos pos; | |
319 | bool fastpath = true; | |
320 | int err = 0, i; | |
321 | ||
322 | if (!new_prog || !new_cnt) | |
323 | return -EINVAL; | |
324 | ||
325 | if (cnt_space > BPF_MAXINSNS) | |
326 | cnt_space = BPF_MAXINSNS; | |
327 | ||
328 | pos.begin = new_prog; | |
329 | pos.end = new_prog + cnt_space; | |
330 | pos.pos = new_prog; | |
331 | ||
332 | if (!nargs) { | |
333 | ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), | |
334 | &pos); | |
335 | ||
336 | if (check_pos(&pos)) | |
337 | goto errout; | |
338 | ||
339 | *new_cnt = pos_get_cnt(&pos); | |
340 | return 0; | |
341 | } | |
342 | ||
343 | if (nargs > BPF_PROLOGUE_MAX_ARGS) { | |
344 | pr_warning("bpf: prologue: %d arguments are dropped\n", | |
345 | nargs - BPF_PROLOGUE_MAX_ARGS); | |
346 | nargs = BPF_PROLOGUE_MAX_ARGS; | |
347 | } | |
348 | ||
349 | /* First pass: validation */ | |
350 | for (i = 0; i < nargs; i++) { | |
351 | struct probe_trace_arg_ref *ref = args[i].ref; | |
352 | ||
353 | if (args[i].value[0] == '@') { | |
354 | /* TODO: fetch global variable */ | |
355 | pr_err("bpf: prologue: global %s%+ld not support\n", | |
356 | args[i].value, ref ? ref->offset : 0); | |
357 | return -ENOTSUP; | |
358 | } | |
359 | ||
360 | while (ref) { | |
361 | /* fastpath is true if all args has ref == NULL */ | |
362 | fastpath = false; | |
363 | ||
364 | /* | |
365 | * Instruction encodes immediate value using | |
366 | * s32, ref->offset is long. On systems which | |
367 | * can't fill long in s32, refuse to process if | |
368 | * ref->offset too large (or small). | |
369 | */ | |
370 | #ifdef __LP64__ | |
371 | #define OFFSET_MAX ((1LL << 31) - 1) | |
372 | #define OFFSET_MIN ((1LL << 31) * -1) | |
373 | if (ref->offset > OFFSET_MAX || | |
374 | ref->offset < OFFSET_MIN) { | |
375 | pr_err("bpf: prologue: offset out of bound: %ld\n", | |
376 | ref->offset); | |
377 | return -BPF_LOADER_ERRNO__PROLOGUEOOB; | |
378 | } | |
379 | #endif | |
380 | ref = ref->next; | |
381 | } | |
382 | } | |
383 | pr_debug("prologue: pass validation\n"); | |
384 | ||
385 | if (fastpath) { | |
386 | /* If all variables are registers... */ | |
387 | pr_debug("prologue: fast path\n"); | |
388 | err = gen_prologue_fastpath(&pos, args, nargs); | |
389 | if (err) | |
390 | goto errout; | |
391 | } else { | |
392 | pr_debug("prologue: slow path\n"); | |
393 | ||
394 | /* Initialization: move ctx to a callee saved register. */ | |
395 | ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos); | |
396 | ||
397 | err = gen_prologue_slowpath(&pos, args, nargs); | |
398 | if (err) | |
399 | goto errout; | |
400 | /* | |
401 | * start of ERROR_CODE (only slow pass needs error code) | |
402 | * mov r2 <- 1 // r2 is error number | |
403 | * mov r3 <- 0 // r3, r4... should be touched or | |
404 | * // verifier would complain | |
405 | * mov r4 <- 0 | |
406 | * ... | |
407 | * goto usercode | |
408 | */ | |
409 | error_code = pos.pos; | |
410 | ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1), | |
411 | &pos); | |
412 | ||
413 | for (i = 0; i < nargs; i++) | |
414 | ins(BPF_ALU64_IMM(BPF_MOV, | |
415 | BPF_PROLOGUE_START_ARG_REG + i, | |
416 | 0), | |
417 | &pos); | |
418 | ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE), | |
419 | &pos); | |
420 | } | |
421 | ||
422 | /* | |
423 | * start of SUCCESS_CODE: | |
424 | * mov r2 <- 0 | |
425 | * goto usercode // skip | |
426 | */ | |
427 | success_code = pos.pos; | |
428 | ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos); | |
429 | ||
430 | /* | |
431 | * start of USER_CODE: | |
432 | * Restore ctx to r1 | |
433 | */ | |
434 | user_code = pos.pos; | |
435 | if (!fastpath) { | |
436 | /* | |
437 | * Only slow path needs restoring of ctx. In fast path, | |
438 | * register are loaded directly from r1. | |
439 | */ | |
440 | ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos); | |
441 | err = prologue_relocate(&pos, error_code, success_code, | |
442 | user_code); | |
443 | if (err) | |
444 | goto errout; | |
445 | } | |
446 | ||
447 | err = check_pos(&pos); | |
448 | if (err) | |
449 | goto errout; | |
450 | ||
451 | *new_cnt = pos_get_cnt(&pos); | |
452 | return 0; | |
453 | errout: | |
454 | return err; | |
455 | } |