]>
Commit | Line | Data |
---|---|---|
1d129d19 JP |
1 | /* |
2 | * tc_bpf.c BPF common code | |
3 | * | |
4 | * This program is free software; you can distribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version | |
7 | * 2 of the License, or (at your option) any later version. | |
8 | * | |
9 | * Authors: Daniel Borkmann <dborkman@redhat.com> | |
10 | * Jiri Pirko <jiri@resnulli.us> | |
11c39b5e | 11 | * Alexei Starovoitov <ast@plumgrid.com> |
1d129d19 JP |
12 | */ |
13 | ||
14 | #include <stdio.h> | |
15 | #include <stdlib.h> | |
16 | #include <unistd.h> | |
17 | #include <string.h> | |
18 | #include <stdbool.h> | |
473d7840 | 19 | #include <stdint.h> |
1d129d19 | 20 | #include <errno.h> |
11c39b5e DB |
21 | #include <fcntl.h> |
22 | #include <stdarg.h> | |
5c5a0f3d | 23 | #include <limits.h> |
1d129d19 | 24 | |
11c39b5e DB |
25 | #ifdef HAVE_ELF |
26 | #include <libelf.h> | |
27 | #include <gelf.h> | |
28 | #endif | |
29 | ||
32e93fb7 DB |
30 | #include <sys/types.h> |
31 | #include <sys/stat.h> | |
32 | #include <sys/un.h> | |
33 | #include <sys/vfs.h> | |
34 | #include <sys/mount.h> | |
35 | #include <sys/syscall.h> | |
36 | #include <sys/sendfile.h> | |
37 | #include <sys/resource.h> | |
38 | ||
39 | #include <linux/bpf.h> | |
40 | #include <linux/filter.h> | |
41 | #include <linux/if_alg.h> | |
42 | ||
8187b012 DB |
43 | #include <arpa/inet.h> |
44 | ||
1d129d19 | 45 | #include "utils.h" |
6256f8c9 DB |
46 | |
47 | #include "bpf_elf.h" | |
48 | #include "bpf_scm.h" | |
49 | ||
1d129d19 JP |
50 | #include "tc_util.h" |
51 | #include "tc_bpf.h" | |
52 | ||
67584e3a ND |
53 | #ifndef AF_ALG |
54 | #define AF_ALG 38 | |
55 | #endif | |
56 | ||
e77fa41d DB |
57 | #ifndef EM_BPF |
58 | #define EM_BPF 247 | |
59 | #endif | |
60 | ||
32e93fb7 DB |
61 | #ifdef HAVE_ELF |
62 | static int bpf_obj_open(const char *path, enum bpf_prog_type type, | |
63 | const char *sec, bool verbose); | |
64 | #else | |
65 | static int bpf_obj_open(const char *path, enum bpf_prog_type type, | |
66 | const char *sec, bool verbose) | |
67 | { | |
68 | fprintf(stderr, "No ELF library support compiled in.\n"); | |
69 | errno = ENOSYS; | |
70 | return -1; | |
71 | } | |
72 | #endif | |
73 | ||
74 | static inline __u64 bpf_ptr_to_u64(const void *ptr) | |
75 | { | |
76 | return (__u64)(unsigned long)ptr; | |
77 | } | |
78 | ||
79 | static int bpf(int cmd, union bpf_attr *attr, unsigned int size) | |
80 | { | |
81 | #ifdef __NR_bpf | |
82 | return syscall(__NR_bpf, cmd, attr, size); | |
83 | #else | |
84 | fprintf(stderr, "No bpf syscall, kernel headers too old?\n"); | |
85 | errno = ENOSYS; | |
86 | return -1; | |
87 | #endif | |
88 | } | |
89 | ||
91d88eeb DB |
90 | static int bpf_map_update(int fd, const void *key, const void *value, |
91 | uint64_t flags) | |
32e93fb7 | 92 | { |
67584e3a ND |
93 | union bpf_attr attr; |
94 | ||
95 | memset(&attr, 0, sizeof(attr)); | |
96 | attr.map_fd = fd; | |
97 | attr.key = bpf_ptr_to_u64(key); | |
98 | attr.value = bpf_ptr_to_u64(value); | |
99 | attr.flags = flags; | |
32e93fb7 | 100 | |
91d88eeb | 101 | return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); |
32e93fb7 DB |
102 | } |
103 | ||
104 | static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, | |
105 | char **bpf_string, bool *need_release, | |
106 | const char separator) | |
1d129d19 JP |
107 | { |
108 | char sp; | |
109 | ||
110 | if (from_file) { | |
111 | size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); | |
112 | char *tmp_string; | |
113 | FILE *fp; | |
114 | ||
115 | tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; | |
116 | tmp_string = malloc(tmp_len); | |
117 | if (tmp_string == NULL) | |
118 | return -ENOMEM; | |
119 | ||
120 | memset(tmp_string, 0, tmp_len); | |
121 | ||
122 | fp = fopen(arg, "r"); | |
123 | if (fp == NULL) { | |
124 | perror("Cannot fopen"); | |
125 | free(tmp_string); | |
126 | return -ENOENT; | |
127 | } | |
128 | ||
129 | if (!fgets(tmp_string, tmp_len, fp)) { | |
130 | free(tmp_string); | |
131 | fclose(fp); | |
132 | return -EIO; | |
133 | } | |
134 | ||
135 | fclose(fp); | |
136 | ||
137 | *need_release = true; | |
138 | *bpf_string = tmp_string; | |
139 | } else { | |
140 | *need_release = false; | |
141 | *bpf_string = arg; | |
142 | } | |
143 | ||
144 | if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || | |
145 | sp != separator) { | |
146 | if (*need_release) | |
147 | free(*bpf_string); | |
148 | return -EINVAL; | |
149 | } | |
150 | ||
151 | return 0; | |
152 | } | |
153 | ||
32e93fb7 DB |
154 | static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops, |
155 | bool from_file) | |
1d129d19 JP |
156 | { |
157 | char *bpf_string, *token, separator = ','; | |
158 | int ret = 0, i = 0; | |
159 | bool need_release; | |
160 | __u16 bpf_len = 0; | |
161 | ||
162 | if (argc < 1) | |
163 | return -EINVAL; | |
164 | if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, | |
165 | &need_release, separator)) | |
166 | return -EINVAL; | |
167 | if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { | |
168 | ret = -EINVAL; | |
169 | goto out; | |
170 | } | |
171 | ||
172 | token = bpf_string; | |
173 | while ((token = strchr(token, separator)) && (++token)[0]) { | |
174 | if (i >= bpf_len) { | |
32a121cb | 175 | fprintf(stderr, "Real program length exceeds encoded length parameter!\n"); |
1d129d19 JP |
176 | ret = -EINVAL; |
177 | goto out; | |
178 | } | |
179 | ||
180 | if (sscanf(token, "%hu %hhu %hhu %u,", | |
181 | &bpf_ops[i].code, &bpf_ops[i].jt, | |
182 | &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { | |
183 | fprintf(stderr, "Error at instruction %d!\n", i); | |
184 | ret = -EINVAL; | |
185 | goto out; | |
186 | } | |
187 | ||
188 | i++; | |
189 | } | |
190 | ||
191 | if (i != bpf_len) { | |
afc1a200 | 192 | fprintf(stderr, "Parsed program length is less than encoded length parameter!\n"); |
1d129d19 JP |
193 | ret = -EINVAL; |
194 | goto out; | |
195 | } | |
196 | ret = bpf_len; | |
1d129d19 JP |
197 | out: |
198 | if (need_release) | |
199 | free(bpf_string); | |
200 | ||
201 | return ret; | |
202 | } | |
203 | ||
204 | void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) | |
205 | { | |
206 | struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops); | |
207 | int i; | |
208 | ||
209 | if (len == 0) | |
210 | return; | |
211 | ||
212 | fprintf(f, "bytecode \'%u,", len); | |
213 | ||
214 | for (i = 0; i < len - 1; i++) | |
215 | fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt, | |
216 | ops[i].jf, ops[i].k); | |
217 | ||
6256f8c9 | 218 | fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt, |
1d129d19 JP |
219 | ops[i].jf, ops[i].k); |
220 | } | |
11c39b5e | 221 | |
afc1a200 DB |
222 | static void bpf_map_pin_report(const struct bpf_elf_map *pin, |
223 | const struct bpf_elf_map *obj) | |
224 | { | |
225 | fprintf(stderr, "Map specification differs from pinned file!\n"); | |
226 | ||
227 | if (obj->type != pin->type) | |
228 | fprintf(stderr, " - Type: %u (obj) != %u (pin)\n", | |
229 | obj->type, pin->type); | |
230 | if (obj->size_key != pin->size_key) | |
231 | fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n", | |
232 | obj->size_key, pin->size_key); | |
233 | if (obj->size_value != pin->size_value) | |
234 | fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n", | |
235 | obj->size_value, pin->size_value); | |
236 | if (obj->max_elem != pin->max_elem) | |
237 | fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n", | |
238 | obj->max_elem, pin->max_elem); | |
4dd3f50a DB |
239 | if (obj->flags != pin->flags) |
240 | fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n", | |
241 | obj->flags, pin->flags); | |
afc1a200 DB |
242 | |
243 | fprintf(stderr, "\n"); | |
244 | } | |
245 | ||
91d88eeb DB |
246 | static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map, |
247 | int length) | |
9e607f2e DB |
248 | { |
249 | char file[PATH_MAX], buff[4096]; | |
250 | struct bpf_elf_map tmp, zero; | |
251 | unsigned int val; | |
252 | FILE *fp; | |
253 | ||
254 | snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); | |
255 | ||
256 | fp = fopen(file, "r"); | |
257 | if (!fp) { | |
258 | fprintf(stderr, "No procfs support?!\n"); | |
259 | return -EIO; | |
260 | } | |
261 | ||
262 | memset(&tmp, 0, sizeof(tmp)); | |
263 | while (fgets(buff, sizeof(buff), fp)) { | |
264 | if (sscanf(buff, "map_type:\t%u", &val) == 1) | |
265 | tmp.type = val; | |
266 | else if (sscanf(buff, "key_size:\t%u", &val) == 1) | |
267 | tmp.size_key = val; | |
268 | else if (sscanf(buff, "value_size:\t%u", &val) == 1) | |
269 | tmp.size_value = val; | |
270 | else if (sscanf(buff, "max_entries:\t%u", &val) == 1) | |
271 | tmp.max_elem = val; | |
4dd3f50a DB |
272 | else if (sscanf(buff, "map_flags:\t%i", &val) == 1) |
273 | tmp.flags = val; | |
9e607f2e DB |
274 | } |
275 | ||
276 | fclose(fp); | |
277 | ||
91d88eeb | 278 | if (!memcmp(&tmp, map, length)) { |
9e607f2e DB |
279 | return 0; |
280 | } else { | |
281 | memset(&zero, 0, sizeof(zero)); | |
282 | /* If kernel doesn't have eBPF-related fdinfo, we cannot do much, | |
283 | * so just accept it. We know we do have an eBPF fd and in this | |
284 | * case, everything is 0. It is guaranteed that no such map exists | |
285 | * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC. | |
286 | */ | |
91d88eeb | 287 | if (!memcmp(&tmp, &zero, length)) |
9e607f2e DB |
288 | return 0; |
289 | ||
afc1a200 | 290 | bpf_map_pin_report(&tmp, map); |
9e607f2e DB |
291 | return -EINVAL; |
292 | } | |
293 | } | |
294 | ||
91d88eeb DB |
295 | static int bpf_mnt_fs(const char *target) |
296 | { | |
297 | bool bind_done = false; | |
298 | ||
299 | while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) { | |
300 | if (errno != EINVAL || bind_done) { | |
301 | fprintf(stderr, "mount --make-private %s failed: %s\n", | |
302 | target, strerror(errno)); | |
303 | return -1; | |
304 | } | |
305 | ||
306 | if (mount(target, target, "none", MS_BIND, NULL)) { | |
307 | fprintf(stderr, "mount --bind %s %s failed: %s\n", | |
308 | target, target, strerror(errno)); | |
309 | return -1; | |
310 | } | |
311 | ||
312 | bind_done = true; | |
313 | } | |
314 | ||
315 | if (mount("bpf", target, "bpf", 0, NULL)) { | |
316 | fprintf(stderr, "mount -t bpf bpf %s failed: %s\n", | |
317 | target, strerror(errno)); | |
318 | return -1; | |
319 | } | |
320 | ||
321 | return 0; | |
322 | } | |
323 | ||
32e93fb7 DB |
324 | static int bpf_valid_mntpt(const char *mnt, unsigned long magic) |
325 | { | |
326 | struct statfs st_fs; | |
327 | ||
328 | if (statfs(mnt, &st_fs) < 0) | |
329 | return -ENOENT; | |
330 | if ((unsigned long)st_fs.f_type != magic) | |
331 | return -ENOENT; | |
332 | ||
333 | return 0; | |
334 | } | |
335 | ||
336 | static const char *bpf_find_mntpt(const char *fstype, unsigned long magic, | |
337 | char *mnt, int len, | |
338 | const char * const *known_mnts) | |
339 | { | |
340 | const char * const *ptr; | |
341 | char type[100]; | |
342 | FILE *fp; | |
343 | ||
344 | if (known_mnts) { | |
345 | ptr = known_mnts; | |
346 | while (*ptr) { | |
347 | if (bpf_valid_mntpt(*ptr, magic) == 0) { | |
348 | strncpy(mnt, *ptr, len - 1); | |
349 | mnt[len - 1] = 0; | |
350 | return mnt; | |
351 | } | |
352 | ptr++; | |
353 | } | |
354 | } | |
355 | ||
356 | fp = fopen("/proc/mounts", "r"); | |
357 | if (fp == NULL || len != PATH_MAX) | |
358 | return NULL; | |
359 | ||
360 | while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n", | |
361 | mnt, type) == 2) { | |
362 | if (strcmp(type, fstype) == 0) | |
363 | break; | |
364 | } | |
365 | ||
366 | fclose(fp); | |
367 | if (strcmp(type, fstype) != 0) | |
368 | return NULL; | |
369 | ||
370 | return mnt; | |
371 | } | |
372 | ||
373 | int bpf_trace_pipe(void) | |
374 | { | |
375 | char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT; | |
376 | static const char * const tracefs_known_mnts[] = { | |
377 | TRACE_DIR_MNT, | |
378 | "/sys/kernel/debug/tracing", | |
379 | "/tracing", | |
380 | "/trace", | |
381 | 0, | |
382 | }; | |
383 | char tpipe[PATH_MAX]; | |
384 | const char *mnt; | |
385 | int fd; | |
386 | ||
387 | mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt, | |
388 | sizeof(tracefs_mnt), tracefs_known_mnts); | |
389 | if (!mnt) { | |
390 | fprintf(stderr, "tracefs not mounted?\n"); | |
391 | return -1; | |
392 | } | |
393 | ||
394 | snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt); | |
395 | ||
396 | fd = open(tpipe, O_RDONLY); | |
397 | if (fd < 0) | |
398 | return -1; | |
399 | ||
400 | fprintf(stderr, "Running! Hang up with ^C!\n\n"); | |
401 | while (1) { | |
402 | static char buff[4096]; | |
403 | ssize_t ret; | |
404 | ||
405 | ret = read(fd, buff, sizeof(buff) - 1); | |
406 | if (ret > 0) { | |
407 | write(2, buff, ret); | |
408 | fflush(stderr); | |
409 | } | |
410 | } | |
411 | ||
412 | return 0; | |
413 | } | |
414 | ||
91d88eeb DB |
415 | static const char *bpf_get_tc_dir(void) |
416 | { | |
32a121cb | 417 | static bool bpf_mnt_cached; |
91d88eeb DB |
418 | static char bpf_tc_dir[PATH_MAX]; |
419 | static const char *mnt; | |
420 | static const char * const bpf_known_mnts[] = { | |
421 | BPF_DIR_MNT, | |
422 | 0, | |
423 | }; | |
424 | char bpf_mnt[PATH_MAX] = BPF_DIR_MNT; | |
425 | char bpf_glo_dir[PATH_MAX]; | |
426 | int ret; | |
427 | ||
428 | if (bpf_mnt_cached) | |
429 | goto done; | |
430 | ||
431 | mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_mnt, sizeof(bpf_mnt), | |
432 | bpf_known_mnts); | |
433 | if (!mnt) { | |
434 | mnt = getenv(BPF_ENV_MNT); | |
435 | if (!mnt) | |
436 | mnt = BPF_DIR_MNT; | |
437 | ret = bpf_mnt_fs(mnt); | |
438 | if (ret) { | |
439 | mnt = NULL; | |
440 | goto out; | |
441 | } | |
442 | } | |
443 | ||
444 | snprintf(bpf_tc_dir, sizeof(bpf_tc_dir), "%s/%s", mnt, BPF_DIR_TC); | |
445 | ret = mkdir(bpf_tc_dir, S_IRWXU); | |
446 | if (ret && errno != EEXIST) { | |
447 | fprintf(stderr, "mkdir %s failed: %s\n", bpf_tc_dir, | |
448 | strerror(errno)); | |
449 | mnt = NULL; | |
450 | goto out; | |
451 | } | |
452 | ||
453 | snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s", | |
454 | bpf_tc_dir, BPF_DIR_GLOBALS); | |
455 | ret = mkdir(bpf_glo_dir, S_IRWXU); | |
456 | if (ret && errno != EEXIST) { | |
457 | fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir, | |
458 | strerror(errno)); | |
459 | mnt = NULL; | |
460 | goto out; | |
461 | } | |
462 | ||
463 | mnt = bpf_tc_dir; | |
464 | out: | |
465 | bpf_mnt_cached = true; | |
466 | done: | |
467 | return mnt; | |
468 | } | |
469 | ||
470 | static int bpf_obj_get(const char *pathname) | |
471 | { | |
472 | union bpf_attr attr; | |
473 | char tmp[PATH_MAX]; | |
474 | ||
475 | if (strlen(pathname) > 2 && pathname[0] == 'm' && | |
476 | pathname[1] == ':' && bpf_get_tc_dir()) { | |
477 | snprintf(tmp, sizeof(tmp), "%s/%s", | |
478 | bpf_get_tc_dir(), pathname + 2); | |
479 | pathname = tmp; | |
480 | } | |
481 | ||
482 | memset(&attr, 0, sizeof(attr)); | |
483 | attr.pathname = bpf_ptr_to_u64(pathname); | |
484 | ||
485 | return bpf(BPF_OBJ_GET, &attr, sizeof(attr)); | |
486 | } | |
487 | ||
6256f8c9 | 488 | const char *bpf_default_section(const enum bpf_prog_type type) |
11c39b5e DB |
489 | { |
490 | switch (type) { | |
491 | case BPF_PROG_TYPE_SCHED_CLS: | |
492 | return ELF_SECTION_CLASSIFIER; | |
6256f8c9 DB |
493 | case BPF_PROG_TYPE_SCHED_ACT: |
494 | return ELF_SECTION_ACTION; | |
11c39b5e DB |
495 | default: |
496 | return NULL; | |
497 | } | |
498 | } | |
499 | ||
91d88eeb DB |
500 | enum bpf_mode { |
501 | CBPF_BYTECODE = 0, | |
502 | CBPF_FILE, | |
503 | EBPF_OBJECT, | |
504 | EBPF_PINNED, | |
505 | __BPF_MODE_MAX, | |
506 | #define BPF_MODE_MAX __BPF_MODE_MAX | |
507 | }; | |
508 | ||
509 | static int bpf_parse(int *ptr_argc, char ***ptr_argv, const bool *opt_tbl, | |
510 | enum bpf_prog_type *type, enum bpf_mode *mode, | |
511 | const char **ptr_object, const char **ptr_section, | |
512 | const char **ptr_uds_name, struct sock_filter *opcodes) | |
32e93fb7 | 513 | { |
32e93fb7 | 514 | const char *file, *section, *uds_name; |
32e93fb7 | 515 | bool verbose = false; |
91d88eeb DB |
516 | int ret, argc; |
517 | char **argv; | |
518 | ||
519 | argv = *ptr_argv; | |
520 | argc = *ptr_argc; | |
521 | ||
522 | if (opt_tbl[CBPF_BYTECODE] && | |
523 | (matches(*argv, "bytecode") == 0 || | |
524 | strcmp(*argv, "bc") == 0)) { | |
525 | *mode = CBPF_BYTECODE; | |
526 | } else if (opt_tbl[CBPF_FILE] && | |
527 | (matches(*argv, "bytecode-file") == 0 || | |
528 | strcmp(*argv, "bcf") == 0)) { | |
529 | *mode = CBPF_FILE; | |
530 | } else if (opt_tbl[EBPF_OBJECT] && | |
531 | (matches(*argv, "object-file") == 0 || | |
532 | strcmp(*argv, "obj") == 0)) { | |
533 | *mode = EBPF_OBJECT; | |
534 | } else if (opt_tbl[EBPF_PINNED] && | |
535 | (matches(*argv, "object-pinned") == 0 || | |
536 | matches(*argv, "pinned") == 0 || | |
537 | matches(*argv, "fd") == 0)) { | |
538 | *mode = EBPF_PINNED; | |
32e93fb7 DB |
539 | } else { |
540 | fprintf(stderr, "What mode is \"%s\"?\n", *argv); | |
541 | return -1; | |
542 | } | |
543 | ||
544 | NEXT_ARG(); | |
545 | file = section = uds_name = NULL; | |
91d88eeb | 546 | if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) { |
32e93fb7 DB |
547 | file = *argv; |
548 | NEXT_ARG_FWD(); | |
549 | ||
91d88eeb DB |
550 | if (*type == BPF_PROG_TYPE_UNSPEC) { |
551 | if (argc > 0 && matches(*argv, "type") == 0) { | |
552 | NEXT_ARG(); | |
553 | if (matches(*argv, "cls") == 0) { | |
554 | *type = BPF_PROG_TYPE_SCHED_CLS; | |
555 | } else if (matches(*argv, "act") == 0) { | |
556 | *type = BPF_PROG_TYPE_SCHED_ACT; | |
557 | } else { | |
558 | fprintf(stderr, "What type is \"%s\"?\n", | |
559 | *argv); | |
560 | return -1; | |
561 | } | |
562 | NEXT_ARG_FWD(); | |
563 | } else { | |
564 | *type = BPF_PROG_TYPE_SCHED_CLS; | |
565 | } | |
566 | } | |
567 | ||
568 | section = bpf_default_section(*type); | |
32e93fb7 DB |
569 | if (argc > 0 && matches(*argv, "section") == 0) { |
570 | NEXT_ARG(); | |
571 | section = *argv; | |
572 | NEXT_ARG_FWD(); | |
573 | } | |
574 | ||
575 | uds_name = getenv(BPF_ENV_UDS); | |
576 | if (argc > 0 && !uds_name && | |
577 | matches(*argv, "export") == 0) { | |
578 | NEXT_ARG(); | |
579 | uds_name = *argv; | |
580 | NEXT_ARG_FWD(); | |
581 | } | |
582 | ||
583 | if (argc > 0 && matches(*argv, "verbose") == 0) { | |
584 | verbose = true; | |
585 | NEXT_ARG_FWD(); | |
586 | } | |
587 | ||
588 | PREV_ARG(); | |
589 | } | |
590 | ||
91d88eeb DB |
591 | if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE) |
592 | ret = bpf_ops_parse(argc, argv, opcodes, *mode == CBPF_FILE); | |
593 | else if (*mode == EBPF_OBJECT) | |
594 | ret = bpf_obj_open(file, *type, section, verbose); | |
595 | else if (*mode == EBPF_PINNED) | |
32e93fb7 | 596 | ret = bpf_obj_get(file); |
91d88eeb | 597 | else |
32e93fb7 DB |
598 | return -1; |
599 | ||
91d88eeb DB |
600 | if (ptr_object) |
601 | *ptr_object = file; | |
602 | if (ptr_section) | |
603 | *ptr_section = section; | |
604 | if (ptr_uds_name) | |
605 | *ptr_uds_name = uds_name; | |
606 | ||
607 | *ptr_argc = argc; | |
608 | *ptr_argv = argv; | |
609 | ||
610 | return ret; | |
611 | } | |
612 | ||
613 | int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl, | |
614 | enum bpf_prog_type type, const char **ptr_object, | |
615 | const char **ptr_uds_name, struct nlmsghdr *n) | |
616 | { | |
617 | struct sock_filter opcodes[BPF_MAXINSNS]; | |
618 | const bool opt_tbl[BPF_MODE_MAX] = { | |
619 | [CBPF_BYTECODE] = true, | |
620 | [CBPF_FILE] = true, | |
621 | [EBPF_OBJECT] = true, | |
622 | [EBPF_PINNED] = true, | |
623 | }; | |
624 | char annotation[256]; | |
625 | const char *section; | |
626 | enum bpf_mode mode; | |
627 | int ret; | |
628 | ||
629 | ret = bpf_parse(ptr_argc, ptr_argv, opt_tbl, &type, &mode, | |
630 | ptr_object, §ion, ptr_uds_name, opcodes); | |
631 | if (ret < 0) | |
632 | return ret; | |
633 | ||
32e93fb7 DB |
634 | if (mode == CBPF_BYTECODE || mode == CBPF_FILE) { |
635 | addattr16(n, MAX_MSG, nla_tbl[BPF_NLA_OPS_LEN], ret); | |
636 | addattr_l(n, MAX_MSG, nla_tbl[BPF_NLA_OPS], opcodes, | |
637 | ret * sizeof(struct sock_filter)); | |
91d88eeb DB |
638 | } |
639 | ||
640 | if (mode == EBPF_OBJECT || mode == EBPF_PINNED) { | |
32e93fb7 | 641 | snprintf(annotation, sizeof(annotation), "%s:[%s]", |
91d88eeb DB |
642 | basename(*ptr_object), mode == EBPF_PINNED ? |
643 | "*fsobj" : section); | |
32e93fb7 DB |
644 | |
645 | addattr32(n, MAX_MSG, nla_tbl[BPF_NLA_FD], ret); | |
646 | addattrstrz(n, MAX_MSG, nla_tbl[BPF_NLA_NAME], annotation); | |
647 | } | |
648 | ||
91d88eeb DB |
649 | return 0; |
650 | } | |
32e93fb7 | 651 | |
91d88eeb DB |
652 | int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv) |
653 | { | |
654 | enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC; | |
655 | const bool opt_tbl[BPF_MODE_MAX] = { | |
656 | [CBPF_BYTECODE] = false, | |
657 | [CBPF_FILE] = false, | |
658 | [EBPF_OBJECT] = true, | |
659 | [EBPF_PINNED] = true, | |
660 | }; | |
661 | const struct bpf_elf_map test = { | |
662 | .type = BPF_MAP_TYPE_PROG_ARRAY, | |
663 | .size_key = sizeof(int), | |
664 | .size_value = sizeof(int), | |
665 | }; | |
666 | int ret, prog_fd, map_fd; | |
667 | const char *section; | |
668 | enum bpf_mode mode; | |
669 | uint32_t map_key; | |
670 | ||
671 | prog_fd = bpf_parse(&argc, &argv, opt_tbl, &type, &mode, | |
672 | NULL, §ion, NULL, NULL); | |
673 | if (prog_fd < 0) | |
674 | return prog_fd; | |
675 | if (key) { | |
676 | map_key = *key; | |
677 | } else { | |
678 | ret = sscanf(section, "%*i/%i", &map_key); | |
679 | if (ret != 1) { | |
32a121cb | 680 | fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n"); |
91d88eeb DB |
681 | ret = -EINVAL; |
682 | goto out_prog; | |
683 | } | |
684 | } | |
32e93fb7 | 685 | |
91d88eeb DB |
686 | map_fd = bpf_obj_get(map_path); |
687 | if (map_fd < 0) { | |
688 | fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n", | |
689 | map_path, strerror(errno)); | |
690 | ret = map_fd; | |
691 | goto out_prog; | |
692 | } | |
693 | ||
694 | ret = bpf_map_selfcheck_pinned(map_fd, &test, | |
695 | offsetof(struct bpf_elf_map, max_elem)); | |
696 | if (ret < 0) { | |
697 | fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path); | |
698 | goto out_map; | |
699 | } | |
700 | ||
701 | ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY); | |
702 | if (ret < 0) | |
703 | fprintf(stderr, "Map update failed: %s\n", strerror(errno)); | |
704 | out_map: | |
705 | close(map_fd); | |
706 | out_prog: | |
707 | close(prog_fd); | |
708 | return ret; | |
32e93fb7 DB |
709 | } |
710 | ||
6256f8c9 | 711 | #ifdef HAVE_ELF |
32e93fb7 DB |
712 | struct bpf_elf_prog { |
713 | enum bpf_prog_type type; | |
714 | const struct bpf_insn *insns; | |
715 | size_t size; | |
716 | const char *license; | |
717 | }; | |
718 | ||
f6793eec DB |
719 | struct bpf_hash_entry { |
720 | unsigned int pinning; | |
721 | const char *subpath; | |
722 | struct bpf_hash_entry *next; | |
723 | }; | |
724 | ||
32e93fb7 DB |
725 | struct bpf_elf_ctx { |
726 | Elf *elf_fd; | |
727 | GElf_Ehdr elf_hdr; | |
728 | Elf_Data *sym_tab; | |
729 | Elf_Data *str_tab; | |
730 | int obj_fd; | |
731 | int map_fds[ELF_MAX_MAPS]; | |
732 | struct bpf_elf_map maps[ELF_MAX_MAPS]; | |
733 | int sym_num; | |
734 | int map_num; | |
735 | bool *sec_done; | |
736 | int sec_maps; | |
737 | char license[ELF_MAX_LICENSE_LEN]; | |
738 | enum bpf_prog_type type; | |
739 | bool verbose; | |
740 | struct bpf_elf_st stat; | |
f6793eec | 741 | struct bpf_hash_entry *ht[256]; |
f31645d1 DB |
742 | char *log; |
743 | size_t log_size; | |
32e93fb7 DB |
744 | }; |
745 | ||
6256f8c9 | 746 | struct bpf_elf_sec_data { |
32e93fb7 DB |
747 | GElf_Shdr sec_hdr; |
748 | Elf_Data *sec_data; | |
749 | const char *sec_name; | |
6256f8c9 DB |
750 | }; |
751 | ||
752 | struct bpf_map_data { | |
32e93fb7 DB |
753 | int *fds; |
754 | const char *obj; | |
755 | struct bpf_elf_st *st; | |
756 | struct bpf_elf_map *ent; | |
6256f8c9 DB |
757 | }; |
758 | ||
f31645d1 DB |
759 | static __check_format_string(2, 3) void |
760 | bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...) | |
11c39b5e DB |
761 | { |
762 | va_list vl; | |
763 | ||
764 | va_start(vl, format); | |
765 | vfprintf(stderr, format, vl); | |
766 | va_end(vl); | |
767 | ||
f31645d1 | 768 | if (ctx->log && ctx->log[0]) { |
afc1a200 DB |
769 | if (ctx->verbose) { |
770 | fprintf(stderr, "%s\n", ctx->log); | |
771 | } else { | |
772 | unsigned int off = 0, len = strlen(ctx->log); | |
773 | ||
774 | if (len > BPF_MAX_LOG) { | |
775 | off = len - BPF_MAX_LOG; | |
776 | fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n", | |
777 | off); | |
778 | } | |
779 | fprintf(stderr, "%s\n", ctx->log + off); | |
780 | } | |
781 | ||
f31645d1 DB |
782 | memset(ctx->log, 0, ctx->log_size); |
783 | } | |
784 | } | |
785 | ||
786 | static int bpf_log_realloc(struct bpf_elf_ctx *ctx) | |
787 | { | |
788 | size_t log_size = ctx->log_size; | |
789 | void *ptr; | |
790 | ||
791 | if (!ctx->log) { | |
792 | log_size = 65536; | |
793 | } else { | |
794 | log_size <<= 1; | |
795 | if (log_size > (UINT_MAX >> 8)) | |
796 | return -EINVAL; | |
d937a74b | 797 | } |
f31645d1 DB |
798 | |
799 | ptr = realloc(ctx->log, log_size); | |
800 | if (!ptr) | |
801 | return -ENOMEM; | |
802 | ||
803 | ctx->log = ptr; | |
804 | ctx->log_size = log_size; | |
805 | ||
806 | return 0; | |
11c39b5e DB |
807 | } |
808 | ||
4dd3f50a DB |
809 | static int bpf_map_create(enum bpf_map_type type, uint32_t size_key, |
810 | uint32_t size_value, uint32_t max_elem, | |
811 | uint32_t flags) | |
11c39b5e | 812 | { |
67584e3a ND |
813 | union bpf_attr attr; |
814 | ||
815 | memset(&attr, 0, sizeof(attr)); | |
816 | attr.map_type = type; | |
817 | attr.key_size = size_key; | |
818 | attr.value_size = size_value; | |
819 | attr.max_entries = max_elem; | |
4dd3f50a | 820 | attr.map_flags = flags; |
11c39b5e DB |
821 | |
822 | return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); | |
823 | } | |
824 | ||
825 | static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, | |
f31645d1 DB |
826 | size_t size_insns, const char *license, char *log, |
827 | size_t size_log) | |
11c39b5e | 828 | { |
67584e3a ND |
829 | union bpf_attr attr; |
830 | ||
831 | memset(&attr, 0, sizeof(attr)); | |
832 | attr.prog_type = type; | |
833 | attr.insns = bpf_ptr_to_u64(insns); | |
f31645d1 | 834 | attr.insn_cnt = size_insns / sizeof(struct bpf_insn); |
67584e3a | 835 | attr.license = bpf_ptr_to_u64(license); |
11c39b5e | 836 | |
f31645d1 DB |
837 | if (size_log > 0) { |
838 | attr.log_buf = bpf_ptr_to_u64(log); | |
839 | attr.log_size = size_log; | |
840 | attr.log_level = 1; | |
32e93fb7 DB |
841 | } |
842 | ||
11c39b5e DB |
843 | return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); |
844 | } | |
845 | ||
32e93fb7 | 846 | static int bpf_obj_pin(int fd, const char *pathname) |
11c39b5e | 847 | { |
67584e3a ND |
848 | union bpf_attr attr; |
849 | ||
850 | memset(&attr, 0, sizeof(attr)); | |
851 | attr.pathname = bpf_ptr_to_u64(pathname); | |
852 | attr.bpf_fd = fd; | |
32e93fb7 DB |
853 | |
854 | return bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); | |
855 | } | |
11c39b5e | 856 | |
32e93fb7 DB |
857 | static int bpf_obj_hash(const char *object, uint8_t *out, size_t len) |
858 | { | |
859 | struct sockaddr_alg alg = { | |
860 | .salg_family = AF_ALG, | |
861 | .salg_type = "hash", | |
862 | .salg_name = "sha1", | |
863 | }; | |
864 | int ret, cfd, ofd, ffd; | |
865 | struct stat stbuff; | |
866 | ssize_t size; | |
867 | ||
868 | if (!object || len != 20) | |
869 | return -EINVAL; | |
870 | ||
871 | cfd = socket(AF_ALG, SOCK_SEQPACKET, 0); | |
872 | if (cfd < 0) { | |
873 | fprintf(stderr, "Cannot get AF_ALG socket: %s\n", | |
874 | strerror(errno)); | |
875 | return cfd; | |
876 | } | |
877 | ||
878 | ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg)); | |
879 | if (ret < 0) { | |
880 | fprintf(stderr, "Error binding socket: %s\n", strerror(errno)); | |
881 | goto out_cfd; | |
882 | } | |
883 | ||
884 | ofd = accept(cfd, NULL, 0); | |
885 | if (ofd < 0) { | |
886 | fprintf(stderr, "Error accepting socket: %s\n", | |
887 | strerror(errno)); | |
888 | ret = ofd; | |
889 | goto out_cfd; | |
890 | } | |
891 | ||
892 | ffd = open(object, O_RDONLY); | |
893 | if (ffd < 0) { | |
894 | fprintf(stderr, "Error opening object %s: %s\n", | |
895 | object, strerror(errno)); | |
896 | ret = ffd; | |
897 | goto out_ofd; | |
898 | } | |
899 | ||
32a121cb | 900 | ret = fstat(ffd, &stbuff); |
32e93fb7 DB |
901 | if (ret < 0) { |
902 | fprintf(stderr, "Error doing fstat: %s\n", | |
903 | strerror(errno)); | |
904 | goto out_ffd; | |
d937a74b | 905 | } |
11c39b5e | 906 | |
32e93fb7 DB |
907 | size = sendfile(ofd, ffd, NULL, stbuff.st_size); |
908 | if (size != stbuff.st_size) { | |
909 | fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n", | |
910 | size, stbuff.st_size, strerror(errno)); | |
911 | ret = -1; | |
912 | goto out_ffd; | |
913 | } | |
914 | ||
915 | size = read(ofd, out, len); | |
916 | if (size != len) { | |
917 | fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n", | |
918 | size, len, strerror(errno)); | |
919 | ret = -1; | |
920 | } else { | |
921 | ret = 0; | |
922 | } | |
923 | out_ffd: | |
924 | close(ffd); | |
925 | out_ofd: | |
926 | close(ofd); | |
927 | out_cfd: | |
928 | close(cfd); | |
929 | return ret; | |
11c39b5e DB |
930 | } |
931 | ||
32e93fb7 | 932 | static const char *bpf_get_obj_uid(const char *pathname) |
11c39b5e | 933 | { |
32a121cb | 934 | static bool bpf_uid_cached; |
32e93fb7 DB |
935 | static char bpf_uid[64]; |
936 | uint8_t tmp[20]; | |
937 | int ret; | |
11c39b5e | 938 | |
32e93fb7 DB |
939 | if (bpf_uid_cached) |
940 | goto done; | |
11c39b5e | 941 | |
32e93fb7 DB |
942 | ret = bpf_obj_hash(pathname, tmp, sizeof(tmp)); |
943 | if (ret) { | |
944 | fprintf(stderr, "Object hashing failed!\n"); | |
945 | return NULL; | |
946 | } | |
947 | ||
948 | hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid)); | |
949 | bpf_uid_cached = true; | |
950 | done: | |
951 | return bpf_uid; | |
11c39b5e DB |
952 | } |
953 | ||
32e93fb7 DB |
954 | static int bpf_init_env(const char *pathname) |
955 | { | |
956 | struct rlimit limit = { | |
957 | .rlim_cur = RLIM_INFINITY, | |
958 | .rlim_max = RLIM_INFINITY, | |
959 | }; | |
960 | ||
961 | /* Don't bother in case we fail! */ | |
962 | setrlimit(RLIMIT_MEMLOCK, &limit); | |
963 | ||
964 | if (!bpf_get_tc_dir()) { | |
32a121cb | 965 | fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n"); |
32e93fb7 DB |
966 | return 0; |
967 | } | |
968 | ||
969 | if (!bpf_get_obj_uid(pathname)) | |
970 | return -1; | |
971 | ||
972 | return 0; | |
6256f8c9 DB |
973 | } |
974 | ||
f6793eec DB |
975 | static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx, |
976 | uint32_t pinning) | |
977 | { | |
978 | struct bpf_hash_entry *entry; | |
979 | ||
980 | entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; | |
981 | while (entry && entry->pinning != pinning) | |
982 | entry = entry->next; | |
983 | ||
984 | return entry ? entry->subpath : NULL; | |
985 | } | |
986 | ||
987 | static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx, | |
988 | uint32_t pinning) | |
11c39b5e | 989 | { |
32e93fb7 DB |
990 | switch (pinning) { |
991 | case PIN_OBJECT_NS: | |
992 | case PIN_GLOBAL_NS: | |
993 | return false; | |
994 | case PIN_NONE: | |
32e93fb7 | 995 | return true; |
f6793eec DB |
996 | default: |
997 | return !bpf_custom_pinning(ctx, pinning); | |
32e93fb7 DB |
998 | } |
999 | } | |
1000 | ||
1001 | static void bpf_make_pathname(char *pathname, size_t len, const char *name, | |
f6793eec | 1002 | const struct bpf_elf_ctx *ctx, uint32_t pinning) |
32e93fb7 DB |
1003 | { |
1004 | switch (pinning) { | |
1005 | case PIN_OBJECT_NS: | |
1006 | snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(), | |
1007 | bpf_get_obj_uid(NULL), name); | |
1008 | break; | |
1009 | case PIN_GLOBAL_NS: | |
1010 | snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(), | |
1011 | BPF_DIR_GLOBALS, name); | |
1012 | break; | |
f6793eec DB |
1013 | default: |
1014 | snprintf(pathname, len, "%s/../%s/%s", bpf_get_tc_dir(), | |
1015 | bpf_custom_pinning(ctx, pinning), name); | |
1016 | break; | |
32e93fb7 DB |
1017 | } |
1018 | } | |
1019 | ||
f6793eec DB |
1020 | static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx, |
1021 | uint32_t pinning) | |
32e93fb7 DB |
1022 | { |
1023 | char pathname[PATH_MAX]; | |
1024 | ||
f6793eec | 1025 | if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir()) |
32e93fb7 DB |
1026 | return 0; |
1027 | ||
f6793eec | 1028 | bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); |
32e93fb7 DB |
1029 | return bpf_obj_get(pathname); |
1030 | } | |
1031 | ||
f6793eec | 1032 | static int bpf_make_obj_path(void) |
32e93fb7 | 1033 | { |
f6793eec | 1034 | char tmp[PATH_MAX]; |
32e93fb7 DB |
1035 | int ret; |
1036 | ||
f6793eec DB |
1037 | snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_tc_dir(), |
1038 | bpf_get_obj_uid(NULL)); | |
1039 | ||
1040 | ret = mkdir(tmp, S_IRWXU); | |
1041 | if (ret && errno != EEXIST) { | |
1042 | fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno)); | |
1043 | return ret; | |
1044 | } | |
1045 | ||
1046 | return 0; | |
1047 | } | |
1048 | ||
1049 | static int bpf_make_custom_path(const char *todo) | |
1050 | { | |
1051 | char tmp[PATH_MAX], rem[PATH_MAX], *sub; | |
1052 | int ret; | |
1053 | ||
1054 | snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_tc_dir()); | |
1055 | snprintf(rem, sizeof(rem), "%s/", todo); | |
1056 | sub = strtok(rem, "/"); | |
32e93fb7 | 1057 | |
f6793eec DB |
1058 | while (sub) { |
1059 | if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX) | |
1060 | return -EINVAL; | |
1061 | ||
1062 | strcat(tmp, sub); | |
1063 | strcat(tmp, "/"); | |
32e93fb7 | 1064 | |
f6793eec | 1065 | ret = mkdir(tmp, S_IRWXU); |
32e93fb7 | 1066 | if (ret && errno != EEXIST) { |
f6793eec | 1067 | fprintf(stderr, "mkdir %s failed: %s\n", tmp, |
32e93fb7 DB |
1068 | strerror(errno)); |
1069 | return ret; | |
1070 | } | |
f6793eec DB |
1071 | |
1072 | sub = strtok(NULL, "/"); | |
32e93fb7 DB |
1073 | } |
1074 | ||
f6793eec DB |
1075 | return 0; |
1076 | } | |
1077 | ||
1078 | static int bpf_place_pinned(int fd, const char *name, | |
1079 | const struct bpf_elf_ctx *ctx, uint32_t pinning) | |
1080 | { | |
1081 | char pathname[PATH_MAX]; | |
1082 | const char *tmp; | |
1083 | int ret = 0; | |
1084 | ||
1085 | if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir()) | |
1086 | return 0; | |
1087 | ||
1088 | if (pinning == PIN_OBJECT_NS) | |
1089 | ret = bpf_make_obj_path(); | |
1090 | else if ((tmp = bpf_custom_pinning(ctx, pinning))) | |
1091 | ret = bpf_make_custom_path(tmp); | |
1092 | if (ret < 0) | |
1093 | return ret; | |
1094 | ||
1095 | bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); | |
32e93fb7 DB |
1096 | return bpf_obj_pin(fd, pathname); |
1097 | } | |
1098 | ||
f31645d1 DB |
1099 | static void bpf_prog_report(int fd, const char *section, |
1100 | const struct bpf_elf_prog *prog, | |
1101 | struct bpf_elf_ctx *ctx) | |
32e93fb7 | 1102 | { |
afc1a200 DB |
1103 | unsigned int insns = prog->size / sizeof(struct bpf_insn); |
1104 | ||
1105 | fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section, | |
f31645d1 DB |
1106 | fd < 0 ? "rejected: " : "loaded", |
1107 | fd < 0 ? strerror(errno) : "", | |
1108 | fd < 0 ? errno : fd); | |
1109 | ||
1110 | fprintf(stderr, " - Type: %u\n", prog->type); | |
afc1a200 DB |
1111 | fprintf(stderr, " - Instructions: %u (%u over limit)\n", |
1112 | insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0); | |
f31645d1 DB |
1113 | fprintf(stderr, " - License: %s\n\n", prog->license); |
1114 | ||
1115 | bpf_dump_error(ctx, "Verifier analysis:\n\n"); | |
1116 | } | |
32e93fb7 | 1117 | |
f31645d1 DB |
1118 | static int bpf_prog_attach(const char *section, |
1119 | const struct bpf_elf_prog *prog, | |
1120 | struct bpf_elf_ctx *ctx) | |
1121 | { | |
1122 | int tries = 0, fd; | |
1123 | retry: | |
32e93fb7 DB |
1124 | errno = 0; |
1125 | fd = bpf_prog_load(prog->type, prog->insns, prog->size, | |
f31645d1 DB |
1126 | prog->license, ctx->log, ctx->log_size); |
1127 | if (fd < 0 || ctx->verbose) { | |
1128 | /* The verifier log is pretty chatty, sometimes so chatty | |
1129 | * on larger programs, that we could fail to dump everything | |
1130 | * into our buffer. Still, try to give a debuggable error | |
1131 | * log for the user, so enlarge it and re-fail. | |
1132 | */ | |
1133 | if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) { | |
1134 | if (tries++ < 6 && !bpf_log_realloc(ctx)) | |
1135 | goto retry; | |
1136 | ||
32a121cb | 1137 | fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n", |
f31645d1 DB |
1138 | ctx->log_size, tries); |
1139 | return fd; | |
1140 | } | |
1141 | ||
1142 | bpf_prog_report(fd, section, prog, ctx); | |
32e93fb7 DB |
1143 | } |
1144 | ||
1145 | return fd; | |
1146 | } | |
1147 | ||
f31645d1 DB |
1148 | static void bpf_map_report(int fd, const char *name, |
1149 | const struct bpf_elf_map *map, | |
1150 | struct bpf_elf_ctx *ctx) | |
1151 | { | |
1152 | fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name, | |
1153 | fd < 0 ? "rejected: " : "loaded", | |
1154 | fd < 0 ? strerror(errno) : "", | |
1155 | fd < 0 ? errno : fd); | |
1156 | ||
1157 | fprintf(stderr, " - Type: %u\n", map->type); | |
1158 | fprintf(stderr, " - Identifier: %u\n", map->id); | |
1159 | fprintf(stderr, " - Pinning: %u\n", map->pinning); | |
1160 | fprintf(stderr, " - Size key: %u\n", map->size_key); | |
1161 | fprintf(stderr, " - Size value: %u\n", map->size_value); | |
4dd3f50a DB |
1162 | fprintf(stderr, " - Max elems: %u\n", map->max_elem); |
1163 | fprintf(stderr, " - Flags: %#x\n\n", map->flags); | |
f31645d1 DB |
1164 | } |
1165 | ||
32e93fb7 | 1166 | static int bpf_map_attach(const char *name, const struct bpf_elf_map *map, |
f31645d1 | 1167 | struct bpf_elf_ctx *ctx) |
32e93fb7 DB |
1168 | { |
1169 | int fd, ret; | |
1170 | ||
f6793eec | 1171 | fd = bpf_probe_pinned(name, ctx, map->pinning); |
32e93fb7 | 1172 | if (fd > 0) { |
91d88eeb DB |
1173 | ret = bpf_map_selfcheck_pinned(fd, map, |
1174 | offsetof(struct bpf_elf_map, | |
1175 | id)); | |
9e607f2e DB |
1176 | if (ret < 0) { |
1177 | close(fd); | |
1178 | fprintf(stderr, "Map \'%s\' self-check failed!\n", | |
1179 | name); | |
1180 | return ret; | |
1181 | } | |
f31645d1 | 1182 | if (ctx->verbose) |
32e93fb7 DB |
1183 | fprintf(stderr, "Map \'%s\' loaded as pinned!\n", |
1184 | name); | |
1185 | return fd; | |
1186 | } | |
1187 | ||
1188 | errno = 0; | |
1189 | fd = bpf_map_create(map->type, map->size_key, map->size_value, | |
4dd3f50a | 1190 | map->max_elem, map->flags); |
f31645d1 DB |
1191 | if (fd < 0 || ctx->verbose) { |
1192 | bpf_map_report(fd, name, map, ctx); | |
32e93fb7 DB |
1193 | if (fd < 0) |
1194 | return fd; | |
1195 | } | |
1196 | ||
f6793eec | 1197 | ret = bpf_place_pinned(fd, name, ctx, map->pinning); |
32e93fb7 DB |
1198 | if (ret < 0 && errno != EEXIST) { |
1199 | fprintf(stderr, "Could not pin %s map: %s\n", name, | |
1200 | strerror(errno)); | |
1201 | close(fd); | |
1202 | return ret; | |
1203 | } | |
1204 | ||
1205 | return fd; | |
1206 | } | |
1207 | ||
32e93fb7 DB |
1208 | static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx, |
1209 | const GElf_Sym *sym) | |
1210 | { | |
1211 | return ctx->str_tab->d_buf + sym->st_name; | |
1212 | } | |
1213 | ||
1214 | static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which) | |
1215 | { | |
1216 | GElf_Sym sym; | |
11c39b5e DB |
1217 | int i; |
1218 | ||
32e93fb7 DB |
1219 | for (i = 0; i < ctx->sym_num; i++) { |
1220 | if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) | |
1221 | continue; | |
1222 | ||
5230a2ed DB |
1223 | if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || |
1224 | GELF_ST_TYPE(sym.st_info) != STT_NOTYPE || | |
32e93fb7 DB |
1225 | sym.st_shndx != ctx->sec_maps || |
1226 | sym.st_value / sizeof(struct bpf_elf_map) != which) | |
1227 | continue; | |
1228 | ||
1229 | return bpf_str_tab_name(ctx, &sym); | |
11c39b5e | 1230 | } |
32e93fb7 DB |
1231 | |
1232 | return NULL; | |
11c39b5e DB |
1233 | } |
1234 | ||
32e93fb7 | 1235 | static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx) |
11c39b5e | 1236 | { |
32e93fb7 DB |
1237 | const char *map_name; |
1238 | int i, fd; | |
11c39b5e | 1239 | |
32e93fb7 DB |
1240 | for (i = 0; i < ctx->map_num; i++) { |
1241 | map_name = bpf_map_fetch_name(ctx, i); | |
1242 | if (!map_name) | |
1243 | return -EIO; | |
11c39b5e | 1244 | |
f31645d1 | 1245 | fd = bpf_map_attach(map_name, &ctx->maps[i], ctx); |
32e93fb7 DB |
1246 | if (fd < 0) |
1247 | return fd; | |
11c39b5e | 1248 | |
32e93fb7 | 1249 | ctx->map_fds[i] = fd; |
11c39b5e DB |
1250 | } |
1251 | ||
1252 | return 0; | |
11c39b5e DB |
1253 | } |
1254 | ||
32e93fb7 DB |
1255 | static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section, |
1256 | struct bpf_elf_sec_data *data) | |
11c39b5e | 1257 | { |
32e93fb7 | 1258 | Elf_Data *sec_edata; |
11c39b5e DB |
1259 | GElf_Shdr sec_hdr; |
1260 | Elf_Scn *sec_fd; | |
11c39b5e DB |
1261 | char *sec_name; |
1262 | ||
32e93fb7 | 1263 | memset(data, 0, sizeof(*data)); |
11c39b5e | 1264 | |
32e93fb7 | 1265 | sec_fd = elf_getscn(ctx->elf_fd, section); |
11c39b5e DB |
1266 | if (!sec_fd) |
1267 | return -EINVAL; | |
11c39b5e DB |
1268 | if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr) |
1269 | return -EIO; | |
1270 | ||
32e93fb7 | 1271 | sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx, |
11c39b5e DB |
1272 | sec_hdr.sh_name); |
1273 | if (!sec_name || !sec_hdr.sh_size) | |
1274 | return -ENOENT; | |
1275 | ||
1276 | sec_edata = elf_getdata(sec_fd, NULL); | |
1277 | if (!sec_edata || elf_getdata(sec_fd, sec_edata)) | |
1278 | return -EIO; | |
1279 | ||
32e93fb7 | 1280 | memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr)); |
11c39b5e | 1281 | |
32e93fb7 DB |
1282 | data->sec_name = sec_name; |
1283 | data->sec_data = sec_edata; | |
11c39b5e DB |
1284 | return 0; |
1285 | } | |
1286 | ||
32e93fb7 DB |
1287 | static int bpf_fetch_maps(struct bpf_elf_ctx *ctx, int section, |
1288 | struct bpf_elf_sec_data *data) | |
11c39b5e | 1289 | { |
32e93fb7 DB |
1290 | if (data->sec_data->d_size % sizeof(struct bpf_elf_map) != 0) |
1291 | return -EINVAL; | |
11c39b5e | 1292 | |
32e93fb7 DB |
1293 | ctx->map_num = data->sec_data->d_size / sizeof(struct bpf_elf_map); |
1294 | ctx->sec_maps = section; | |
1295 | ctx->sec_done[section] = true; | |
11c39b5e | 1296 | |
32e93fb7 DB |
1297 | if (ctx->map_num > ARRAY_SIZE(ctx->map_fds)) { |
1298 | fprintf(stderr, "Too many BPF maps in ELF section!\n"); | |
1299 | return -ENOMEM; | |
1300 | } | |
11c39b5e | 1301 | |
32e93fb7 DB |
1302 | memcpy(ctx->maps, data->sec_data->d_buf, data->sec_data->d_size); |
1303 | return 0; | |
1304 | } | |
11c39b5e | 1305 | |
32e93fb7 DB |
1306 | static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section, |
1307 | struct bpf_elf_sec_data *data) | |
1308 | { | |
1309 | if (data->sec_data->d_size > sizeof(ctx->license)) | |
1310 | return -ENOMEM; | |
11c39b5e | 1311 | |
32e93fb7 DB |
1312 | memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size); |
1313 | ctx->sec_done[section] = true; | |
1314 | return 0; | |
1315 | } | |
11c39b5e | 1316 | |
32e93fb7 DB |
1317 | static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section, |
1318 | struct bpf_elf_sec_data *data) | |
1319 | { | |
1320 | ctx->sym_tab = data->sec_data; | |
1321 | ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize; | |
1322 | ctx->sec_done[section] = true; | |
11c39b5e DB |
1323 | return 0; |
1324 | } | |
1325 | ||
32e93fb7 DB |
1326 | static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section, |
1327 | struct bpf_elf_sec_data *data) | |
11c39b5e | 1328 | { |
32e93fb7 DB |
1329 | ctx->str_tab = data->sec_data; |
1330 | ctx->sec_done[section] = true; | |
1331 | return 0; | |
1332 | } | |
11c39b5e | 1333 | |
afc1a200 DB |
1334 | static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx) |
1335 | { | |
1336 | return ctx->sym_tab && ctx->str_tab && ctx->sec_maps; | |
1337 | } | |
1338 | ||
32e93fb7 DB |
1339 | static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx) |
1340 | { | |
1341 | struct bpf_elf_sec_data data; | |
1342 | int i, ret = -1; | |
11c39b5e | 1343 | |
32e93fb7 DB |
1344 | for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { |
1345 | ret = bpf_fill_section_data(ctx, i, &data); | |
11c39b5e DB |
1346 | if (ret < 0) |
1347 | continue; | |
1348 | ||
cce3d466 DB |
1349 | if (data.sec_hdr.sh_type == SHT_PROGBITS && |
1350 | !strcmp(data.sec_name, ELF_SECTION_MAPS)) | |
32e93fb7 | 1351 | ret = bpf_fetch_maps(ctx, i, &data); |
cce3d466 DB |
1352 | else if (data.sec_hdr.sh_type == SHT_PROGBITS && |
1353 | !strcmp(data.sec_name, ELF_SECTION_LICENSE)) | |
32e93fb7 | 1354 | ret = bpf_fetch_license(ctx, i, &data); |
cce3d466 DB |
1355 | else if (data.sec_hdr.sh_type == SHT_SYMTAB && |
1356 | !strcmp(data.sec_name, ".symtab")) | |
32e93fb7 DB |
1357 | ret = bpf_fetch_symtab(ctx, i, &data); |
1358 | else if (data.sec_hdr.sh_type == SHT_STRTAB && | |
cce3d466 | 1359 | !strcmp(data.sec_name, ".strtab")) |
32e93fb7 DB |
1360 | ret = bpf_fetch_strtab(ctx, i, &data); |
1361 | if (ret < 0) { | |
afc1a200 | 1362 | fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n", |
32a121cb | 1363 | i); |
32e93fb7 | 1364 | break; |
11c39b5e | 1365 | } |
32e93fb7 DB |
1366 | } |
1367 | ||
afc1a200 | 1368 | if (bpf_has_map_data(ctx)) { |
32e93fb7 DB |
1369 | ret = bpf_maps_attach_all(ctx); |
1370 | if (ret < 0) { | |
1371 | fprintf(stderr, "Error loading maps into kernel!\n"); | |
1372 | return ret; | |
11c39b5e DB |
1373 | } |
1374 | } | |
1375 | ||
1376 | return ret; | |
1377 | } | |
1378 | ||
32e93fb7 | 1379 | static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section) |
11c39b5e | 1380 | { |
32e93fb7 DB |
1381 | struct bpf_elf_sec_data data; |
1382 | struct bpf_elf_prog prog; | |
1383 | int ret, i, fd = -1; | |
11c39b5e | 1384 | |
32e93fb7 DB |
1385 | for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { |
1386 | if (ctx->sec_done[i]) | |
11c39b5e DB |
1387 | continue; |
1388 | ||
32e93fb7 | 1389 | ret = bpf_fill_section_data(ctx, i, &data); |
cce3d466 DB |
1390 | if (ret < 0 || |
1391 | !(data.sec_hdr.sh_type == SHT_PROGBITS && | |
1392 | data.sec_hdr.sh_flags & SHF_EXECINSTR && | |
1393 | !strcmp(data.sec_name, section))) | |
11c39b5e DB |
1394 | continue; |
1395 | ||
32e93fb7 DB |
1396 | memset(&prog, 0, sizeof(prog)); |
1397 | prog.type = ctx->type; | |
1398 | prog.insns = data.sec_data->d_buf; | |
1399 | prog.size = data.sec_data->d_size; | |
1400 | prog.license = ctx->license; | |
11c39b5e | 1401 | |
f31645d1 | 1402 | fd = bpf_prog_attach(section, &prog, ctx); |
32e93fb7 | 1403 | if (fd < 0) |
afc1a200 | 1404 | break; |
11c39b5e | 1405 | |
32e93fb7 | 1406 | ctx->sec_done[i] = true; |
11c39b5e DB |
1407 | break; |
1408 | } | |
1409 | ||
32e93fb7 | 1410 | return fd; |
11c39b5e DB |
1411 | } |
1412 | ||
32e93fb7 DB |
1413 | static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx, |
1414 | struct bpf_elf_sec_data *data_relo, | |
1415 | struct bpf_elf_sec_data *data_insn) | |
11c39b5e | 1416 | { |
32e93fb7 DB |
1417 | Elf_Data *idata = data_insn->sec_data; |
1418 | GElf_Shdr *rhdr = &data_relo->sec_hdr; | |
1419 | int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize; | |
1420 | struct bpf_insn *insns = idata->d_buf; | |
1421 | unsigned int num_insns = idata->d_size / sizeof(*insns); | |
11c39b5e | 1422 | |
32e93fb7 DB |
1423 | for (relo_ent = 0; relo_ent < relo_num; relo_ent++) { |
1424 | unsigned int ioff, rmap; | |
1425 | GElf_Rel relo; | |
1426 | GElf_Sym sym; | |
1427 | ||
1428 | if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo) | |
1429 | return -EIO; | |
1430 | ||
1431 | ioff = relo.r_offset / sizeof(struct bpf_insn); | |
1432 | if (ioff >= num_insns || | |
a576c6b9 | 1433 | insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) { |
32a121cb | 1434 | fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n", |
a576c6b9 DB |
1435 | ioff); |
1436 | if (ioff < num_insns && | |
1437 | insns[ioff].code == (BPF_JMP | BPF_CALL)) | |
32a121cb | 1438 | fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n"); |
32e93fb7 | 1439 | return -EINVAL; |
a576c6b9 | 1440 | } |
32e93fb7 DB |
1441 | |
1442 | if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym) | |
1443 | return -EIO; | |
2486337a | 1444 | if (sym.st_shndx != ctx->sec_maps) { |
32a121cb | 1445 | fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n", |
2486337a DB |
1446 | relo_ent, sym.st_shndx); |
1447 | return -EIO; | |
1448 | } | |
32e93fb7 DB |
1449 | |
1450 | rmap = sym.st_value / sizeof(struct bpf_elf_map); | |
1451 | if (rmap >= ARRAY_SIZE(ctx->map_fds)) | |
1452 | return -EINVAL; | |
1453 | if (!ctx->map_fds[rmap]) | |
1454 | return -EINVAL; | |
1455 | ||
1456 | if (ctx->verbose) | |
32a121cb | 1457 | fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n", |
32e93fb7 DB |
1458 | bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap], |
1459 | data_insn->sec_name, ioff); | |
11c39b5e | 1460 | |
32e93fb7 DB |
1461 | insns[ioff].src_reg = BPF_PSEUDO_MAP_FD; |
1462 | insns[ioff].imm = ctx->map_fds[rmap]; | |
1463 | } | |
1464 | ||
1465 | return 0; | |
1466 | } | |
1467 | ||
afc1a200 DB |
1468 | static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section, |
1469 | bool *lderr) | |
32e93fb7 DB |
1470 | { |
1471 | struct bpf_elf_sec_data data_relo, data_insn; | |
1472 | struct bpf_elf_prog prog; | |
1473 | int ret, idx, i, fd = -1; | |
1474 | ||
1475 | for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { | |
1476 | ret = bpf_fill_section_data(ctx, i, &data_relo); | |
1477 | if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL) | |
11c39b5e DB |
1478 | continue; |
1479 | ||
32e93fb7 DB |
1480 | idx = data_relo.sec_hdr.sh_info; |
1481 | ret = bpf_fill_section_data(ctx, idx, &data_insn); | |
cce3d466 DB |
1482 | if (ret < 0 || |
1483 | !(data_insn.sec_hdr.sh_type == SHT_PROGBITS && | |
1484 | data_insn.sec_hdr.sh_flags & SHF_EXECINSTR && | |
1485 | !strcmp(data_insn.sec_name, section))) | |
11c39b5e | 1486 | continue; |
32e93fb7 DB |
1487 | |
1488 | ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn); | |
1489 | if (ret < 0) | |
11c39b5e DB |
1490 | continue; |
1491 | ||
32e93fb7 DB |
1492 | memset(&prog, 0, sizeof(prog)); |
1493 | prog.type = ctx->type; | |
1494 | prog.insns = data_insn.sec_data->d_buf; | |
1495 | prog.size = data_insn.sec_data->d_size; | |
1496 | prog.license = ctx->license; | |
1497 | ||
f31645d1 | 1498 | fd = bpf_prog_attach(section, &prog, ctx); |
afc1a200 DB |
1499 | if (fd < 0) { |
1500 | *lderr = true; | |
1501 | break; | |
1502 | } | |
11c39b5e | 1503 | |
32e93fb7 DB |
1504 | ctx->sec_done[i] = true; |
1505 | ctx->sec_done[idx] = true; | |
11c39b5e DB |
1506 | break; |
1507 | } | |
1508 | ||
32e93fb7 | 1509 | return fd; |
11c39b5e DB |
1510 | } |
1511 | ||
32e93fb7 | 1512 | static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section) |
473d7840 | 1513 | { |
afc1a200 | 1514 | bool lderr = false; |
473d7840 DB |
1515 | int ret = -1; |
1516 | ||
afc1a200 DB |
1517 | if (bpf_has_map_data(ctx)) |
1518 | ret = bpf_fetch_prog_relo(ctx, section, &lderr); | |
1519 | if (ret < 0 && !lderr) | |
32e93fb7 DB |
1520 | ret = bpf_fetch_prog(ctx, section); |
1521 | ||
473d7840 DB |
1522 | return ret; |
1523 | } | |
1524 | ||
910b543d DB |
1525 | static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id) |
1526 | { | |
1527 | int i; | |
1528 | ||
1529 | for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) | |
1530 | if (ctx->map_fds[i] && ctx->maps[i].id == id && | |
1531 | ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) | |
1532 | return i; | |
1533 | return -1; | |
1534 | } | |
1535 | ||
32e93fb7 | 1536 | static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx) |
473d7840 | 1537 | { |
32e93fb7 DB |
1538 | struct bpf_elf_sec_data data; |
1539 | uint32_t map_id, key_id; | |
910b543d | 1540 | int fd, i, ret, idx; |
473d7840 | 1541 | |
32e93fb7 DB |
1542 | for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { |
1543 | if (ctx->sec_done[i]) | |
473d7840 DB |
1544 | continue; |
1545 | ||
32e93fb7 | 1546 | ret = bpf_fill_section_data(ctx, i, &data); |
473d7840 DB |
1547 | if (ret < 0) |
1548 | continue; | |
1549 | ||
910b543d DB |
1550 | ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id); |
1551 | if (ret != 2) | |
32e93fb7 | 1552 | continue; |
910b543d DB |
1553 | |
1554 | idx = bpf_find_map_by_id(ctx, map_id); | |
1555 | if (idx < 0) | |
473d7840 DB |
1556 | continue; |
1557 | ||
32e93fb7 DB |
1558 | fd = bpf_fetch_prog_sec(ctx, data.sec_name); |
1559 | if (fd < 0) | |
473d7840 DB |
1560 | return -EIO; |
1561 | ||
910b543d DB |
1562 | ret = bpf_map_update(ctx->map_fds[idx], &key_id, |
1563 | &fd, BPF_ANY); | |
afc1a200 DB |
1564 | if (ret < 0) { |
1565 | if (errno == E2BIG) | |
1566 | fprintf(stderr, "Tail call key %u for map %u out of bounds?\n", | |
1567 | key_id, map_id); | |
1568 | return -errno; | |
1569 | } | |
473d7840 | 1570 | |
32e93fb7 | 1571 | ctx->sec_done[i] = true; |
473d7840 DB |
1572 | } |
1573 | ||
1574 | return 0; | |
1575 | } | |
1576 | ||
32e93fb7 | 1577 | static void bpf_save_finfo(struct bpf_elf_ctx *ctx) |
11c39b5e | 1578 | { |
32e93fb7 DB |
1579 | struct stat st; |
1580 | int ret; | |
11c39b5e | 1581 | |
32e93fb7 | 1582 | memset(&ctx->stat, 0, sizeof(ctx->stat)); |
11c39b5e | 1583 | |
32e93fb7 DB |
1584 | ret = fstat(ctx->obj_fd, &st); |
1585 | if (ret < 0) { | |
1586 | fprintf(stderr, "Stat of elf file failed: %s\n", | |
1587 | strerror(errno)); | |
1588 | return; | |
1589 | } | |
11c39b5e | 1590 | |
32e93fb7 DB |
1591 | ctx->stat.st_dev = st.st_dev; |
1592 | ctx->stat.st_ino = st.st_ino; | |
1593 | } | |
1594 | ||
f6793eec DB |
1595 | static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path) |
1596 | { | |
1597 | char buff[PATH_MAX]; | |
1598 | ||
1599 | while (fgets(buff, sizeof(buff), fp)) { | |
1600 | char *ptr = buff; | |
1601 | ||
1602 | while (*ptr == ' ' || *ptr == '\t') | |
1603 | ptr++; | |
1604 | ||
1605 | if (*ptr == '#' || *ptr == '\n' || *ptr == 0) | |
1606 | continue; | |
1607 | ||
1608 | if (sscanf(ptr, "%i %s\n", id, path) != 2 && | |
1609 | sscanf(ptr, "%i %s #", id, path) != 2) { | |
1610 | strcpy(path, ptr); | |
1611 | return -1; | |
1612 | } | |
1613 | ||
1614 | return 1; | |
1615 | } | |
1616 | ||
1617 | return 0; | |
1618 | } | |
1619 | ||
1620 | static bool bpf_pinning_reserved(uint32_t pinning) | |
1621 | { | |
1622 | switch (pinning) { | |
1623 | case PIN_NONE: | |
1624 | case PIN_OBJECT_NS: | |
1625 | case PIN_GLOBAL_NS: | |
1626 | return true; | |
1627 | default: | |
1628 | return false; | |
1629 | } | |
1630 | } | |
1631 | ||
1632 | static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file) | |
1633 | { | |
1634 | struct bpf_hash_entry *entry; | |
1635 | char subpath[PATH_MAX]; | |
1636 | uint32_t pinning; | |
1637 | FILE *fp; | |
1638 | int ret; | |
1639 | ||
1640 | fp = fopen(db_file, "r"); | |
1641 | if (!fp) | |
1642 | return; | |
1643 | ||
1644 | memset(subpath, 0, sizeof(subpath)); | |
1645 | while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) { | |
1646 | if (ret == -1) { | |
1647 | fprintf(stderr, "Database %s is corrupted at: %s\n", | |
1648 | db_file, subpath); | |
1649 | fclose(fp); | |
1650 | return; | |
1651 | } | |
1652 | ||
1653 | if (bpf_pinning_reserved(pinning)) { | |
32a121cb SH |
1654 | fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n", |
1655 | db_file, pinning); | |
f6793eec DB |
1656 | continue; |
1657 | } | |
1658 | ||
1659 | entry = malloc(sizeof(*entry)); | |
1660 | if (!entry) { | |
1661 | fprintf(stderr, "No memory left for db entry!\n"); | |
1662 | continue; | |
1663 | } | |
1664 | ||
1665 | entry->pinning = pinning; | |
1666 | entry->subpath = strdup(subpath); | |
1667 | if (!entry->subpath) { | |
1668 | fprintf(stderr, "No memory left for db entry!\n"); | |
1669 | free(entry); | |
1670 | continue; | |
1671 | } | |
1672 | ||
1673 | entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; | |
1674 | ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry; | |
1675 | } | |
1676 | ||
1677 | fclose(fp); | |
1678 | } | |
1679 | ||
1680 | static void bpf_hash_destroy(struct bpf_elf_ctx *ctx) | |
1681 | { | |
1682 | struct bpf_hash_entry *entry; | |
1683 | int i; | |
1684 | ||
1685 | for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) { | |
1686 | while ((entry = ctx->ht[i]) != NULL) { | |
1687 | ctx->ht[i] = entry->next; | |
1688 | free((char *)entry->subpath); | |
1689 | free(entry); | |
1690 | } | |
1691 | } | |
1692 | } | |
1693 | ||
8187b012 DB |
1694 | static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx) |
1695 | { | |
1696 | if (ctx->elf_hdr.e_type != ET_REL || | |
e77fa41d DB |
1697 | (ctx->elf_hdr.e_machine != EM_NONE && |
1698 | ctx->elf_hdr.e_machine != EM_BPF) || | |
8187b012 DB |
1699 | ctx->elf_hdr.e_version != EV_CURRENT) { |
1700 | fprintf(stderr, "ELF format error, ELF file not for eBPF?\n"); | |
1701 | return -EINVAL; | |
1702 | } | |
1703 | ||
1704 | switch (ctx->elf_hdr.e_ident[EI_DATA]) { | |
1705 | default: | |
1706 | fprintf(stderr, "ELF format error, wrong endianness info?\n"); | |
1707 | return -EINVAL; | |
1708 | case ELFDATA2LSB: | |
1709 | if (htons(1) == 1) { | |
1710 | fprintf(stderr, | |
1711 | "We are big endian, eBPF object is little endian!\n"); | |
1712 | return -EIO; | |
1713 | } | |
1714 | break; | |
1715 | case ELFDATA2MSB: | |
1716 | if (htons(1) != 1) { | |
1717 | fprintf(stderr, | |
1718 | "We are little endian, eBPF object is big endian!\n"); | |
1719 | return -EIO; | |
1720 | } | |
1721 | break; | |
1722 | } | |
1723 | ||
1724 | return 0; | |
1725 | } | |
1726 | ||
32e93fb7 DB |
1727 | static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname, |
1728 | enum bpf_prog_type type, bool verbose) | |
1729 | { | |
1730 | int ret = -EINVAL; | |
1731 | ||
1732 | if (elf_version(EV_CURRENT) == EV_NONE || | |
1733 | bpf_init_env(pathname)) | |
1734 | return ret; | |
1735 | ||
1736 | memset(ctx, 0, sizeof(*ctx)); | |
1737 | ctx->verbose = verbose; | |
1738 | ctx->type = type; | |
1739 | ||
1740 | ctx->obj_fd = open(pathname, O_RDONLY); | |
1741 | if (ctx->obj_fd < 0) | |
1742 | return ctx->obj_fd; | |
1743 | ||
1744 | ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL); | |
1745 | if (!ctx->elf_fd) { | |
11c39b5e | 1746 | ret = -EINVAL; |
32e93fb7 | 1747 | goto out_fd; |
11c39b5e DB |
1748 | } |
1749 | ||
8187b012 DB |
1750 | if (elf_kind(ctx->elf_fd) != ELF_K_ELF) { |
1751 | ret = -EINVAL; | |
1752 | goto out_fd; | |
1753 | } | |
1754 | ||
32e93fb7 DB |
1755 | if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) != |
1756 | &ctx->elf_hdr) { | |
11c39b5e DB |
1757 | ret = -EIO; |
1758 | goto out_elf; | |
1759 | } | |
1760 | ||
8187b012 DB |
1761 | ret = bpf_elf_check_ehdr(ctx); |
1762 | if (ret < 0) | |
1763 | goto out_elf; | |
1764 | ||
32e93fb7 DB |
1765 | ctx->sec_done = calloc(ctx->elf_hdr.e_shnum, |
1766 | sizeof(*(ctx->sec_done))); | |
1767 | if (!ctx->sec_done) { | |
11c39b5e DB |
1768 | ret = -ENOMEM; |
1769 | goto out_elf; | |
1770 | } | |
1771 | ||
f31645d1 DB |
1772 | if (ctx->verbose && bpf_log_realloc(ctx)) { |
1773 | ret = -ENOMEM; | |
1774 | goto out_free; | |
1775 | } | |
1776 | ||
32e93fb7 | 1777 | bpf_save_finfo(ctx); |
f6793eec DB |
1778 | bpf_hash_init(ctx, CONFDIR "/bpf_pinning"); |
1779 | ||
32e93fb7 | 1780 | return 0; |
f31645d1 DB |
1781 | out_free: |
1782 | free(ctx->sec_done); | |
32e93fb7 DB |
1783 | out_elf: |
1784 | elf_end(ctx->elf_fd); | |
1785 | out_fd: | |
1786 | close(ctx->obj_fd); | |
1787 | return ret; | |
1788 | } | |
d937a74b | 1789 | |
32e93fb7 DB |
1790 | static int bpf_maps_count(struct bpf_elf_ctx *ctx) |
1791 | { | |
1792 | int i, count = 0; | |
11c39b5e | 1793 | |
32e93fb7 DB |
1794 | for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { |
1795 | if (!ctx->map_fds[i]) | |
1796 | break; | |
1797 | count++; | |
1798 | } | |
473d7840 | 1799 | |
32e93fb7 DB |
1800 | return count; |
1801 | } | |
6256f8c9 | 1802 | |
32e93fb7 DB |
1803 | static void bpf_maps_teardown(struct bpf_elf_ctx *ctx) |
1804 | { | |
1805 | int i; | |
1806 | ||
1807 | for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { | |
1808 | if (ctx->map_fds[i]) | |
1809 | close(ctx->map_fds[i]); | |
473d7840 | 1810 | } |
32e93fb7 DB |
1811 | } |
1812 | ||
1813 | static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure) | |
1814 | { | |
1815 | if (failure) | |
1816 | bpf_maps_teardown(ctx); | |
473d7840 | 1817 | |
f6793eec | 1818 | bpf_hash_destroy(ctx); |
f31645d1 | 1819 | |
32e93fb7 | 1820 | free(ctx->sec_done); |
f31645d1 DB |
1821 | free(ctx->log); |
1822 | ||
32e93fb7 DB |
1823 | elf_end(ctx->elf_fd); |
1824 | close(ctx->obj_fd); | |
1825 | } | |
6256f8c9 | 1826 | |
32e93fb7 | 1827 | static struct bpf_elf_ctx __ctx; |
6256f8c9 | 1828 | |
32e93fb7 DB |
1829 | static int bpf_obj_open(const char *pathname, enum bpf_prog_type type, |
1830 | const char *section, bool verbose) | |
1831 | { | |
1832 | struct bpf_elf_ctx *ctx = &__ctx; | |
1833 | int fd = 0, ret; | |
6256f8c9 | 1834 | |
32e93fb7 DB |
1835 | ret = bpf_elf_ctx_init(ctx, pathname, type, verbose); |
1836 | if (ret < 0) { | |
1837 | fprintf(stderr, "Cannot initialize ELF context!\n"); | |
1838 | return ret; | |
1839 | } | |
6256f8c9 | 1840 | |
32e93fb7 DB |
1841 | ret = bpf_fetch_ancillary(ctx); |
1842 | if (ret < 0) { | |
1843 | fprintf(stderr, "Error fetching ELF ancillary data!\n"); | |
1844 | goto out; | |
1845 | } | |
1846 | ||
1847 | fd = bpf_fetch_prog_sec(ctx, section); | |
1848 | if (fd < 0) { | |
1849 | fprintf(stderr, "Error fetching program/map!\n"); | |
1850 | ret = fd; | |
1851 | goto out; | |
1852 | } | |
1853 | ||
1854 | ret = bpf_fill_prog_arrays(ctx); | |
1855 | if (ret < 0) | |
1856 | fprintf(stderr, "Error filling program arrays!\n"); | |
11c39b5e | 1857 | out: |
32e93fb7 DB |
1858 | bpf_elf_ctx_destroy(ctx, ret < 0); |
1859 | if (ret < 0) { | |
1860 | if (fd) | |
1861 | close(fd); | |
1862 | return ret; | |
1863 | } | |
1864 | ||
1865 | return fd; | |
6256f8c9 | 1866 | } |
11c39b5e | 1867 | |
6256f8c9 | 1868 | static int |
4bd62446 DB |
1869 | bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len, |
1870 | const struct bpf_map_data *aux, unsigned int entries) | |
6256f8c9 DB |
1871 | { |
1872 | struct bpf_map_set_msg msg; | |
1873 | int *cmsg_buf, min_fd; | |
1874 | char *amsg_buf; | |
1875 | int i; | |
1876 | ||
1877 | memset(&msg, 0, sizeof(msg)); | |
1878 | ||
1879 | msg.aux.uds_ver = BPF_SCM_AUX_VER; | |
4bd62446 | 1880 | msg.aux.num_ent = entries; |
6256f8c9 DB |
1881 | |
1882 | strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name)); | |
1883 | memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st)); | |
1884 | ||
1885 | cmsg_buf = bpf_map_set_init(&msg, addr, addr_len); | |
1886 | amsg_buf = (char *)msg.aux.ent; | |
1887 | ||
4bd62446 | 1888 | for (i = 0; i < entries; i += min_fd) { |
6256f8c9 DB |
1889 | int ret; |
1890 | ||
4bd62446 | 1891 | min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i); |
6256f8c9 DB |
1892 | bpf_map_set_init_single(&msg, min_fd); |
1893 | ||
1894 | memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd); | |
1895 | memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd); | |
1896 | ||
1897 | ret = sendmsg(fd, &msg.hdr, 0); | |
1898 | if (ret <= 0) | |
1899 | return ret ? : -1; | |
1900 | } | |
1901 | ||
1902 | return 0; | |
11c39b5e DB |
1903 | } |
1904 | ||
4bd62446 DB |
1905 | static int |
1906 | bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux, | |
1907 | unsigned int entries) | |
1908 | { | |
1909 | struct bpf_map_set_msg msg; | |
1910 | int *cmsg_buf, min_fd; | |
1911 | char *amsg_buf, *mmsg_buf; | |
1912 | unsigned int needed = 1; | |
1913 | int i; | |
1914 | ||
1915 | cmsg_buf = bpf_map_set_init(&msg, NULL, 0); | |
1916 | amsg_buf = (char *)msg.aux.ent; | |
1917 | mmsg_buf = (char *)&msg.aux; | |
1918 | ||
1919 | for (i = 0; i < min(entries, needed); i += min_fd) { | |
1920 | struct cmsghdr *cmsg; | |
1921 | int ret; | |
1922 | ||
1923 | min_fd = min(entries, entries - i); | |
1924 | bpf_map_set_init_single(&msg, min_fd); | |
1925 | ||
1926 | ret = recvmsg(fd, &msg.hdr, 0); | |
1927 | if (ret <= 0) | |
1928 | return ret ? : -1; | |
1929 | ||
1930 | cmsg = CMSG_FIRSTHDR(&msg.hdr); | |
1931 | if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS) | |
1932 | return -EINVAL; | |
1933 | if (msg.hdr.msg_flags & MSG_CTRUNC) | |
1934 | return -EIO; | |
1935 | if (msg.aux.uds_ver != BPF_SCM_AUX_VER) | |
1936 | return -ENOSYS; | |
1937 | ||
1938 | min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd); | |
1939 | if (min_fd > entries || min_fd <= 0) | |
1940 | return -EINVAL; | |
1941 | ||
1942 | memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd); | |
1943 | memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd); | |
1944 | memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent)); | |
1945 | ||
1946 | needed = aux->num_ent; | |
1947 | } | |
1948 | ||
1949 | return 0; | |
1950 | } | |
1951 | ||
1952 | int bpf_send_map_fds(const char *path, const char *obj) | |
6256f8c9 | 1953 | { |
32e93fb7 | 1954 | struct bpf_elf_ctx *ctx = &__ctx; |
6256f8c9 DB |
1955 | struct sockaddr_un addr; |
1956 | struct bpf_map_data bpf_aux; | |
1957 | int fd, ret; | |
1958 | ||
1959 | fd = socket(AF_UNIX, SOCK_DGRAM, 0); | |
1960 | if (fd < 0) { | |
1961 | fprintf(stderr, "Cannot open socket: %s\n", | |
1962 | strerror(errno)); | |
1963 | return -1; | |
1964 | } | |
1965 | ||
1966 | memset(&addr, 0, sizeof(addr)); | |
1967 | addr.sun_family = AF_UNIX; | |
1968 | strncpy(addr.sun_path, path, sizeof(addr.sun_path)); | |
1969 | ||
1970 | ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr)); | |
1971 | if (ret < 0) { | |
1972 | fprintf(stderr, "Cannot connect to %s: %s\n", | |
1973 | path, strerror(errno)); | |
1974 | return -1; | |
1975 | } | |
1976 | ||
1977 | memset(&bpf_aux, 0, sizeof(bpf_aux)); | |
1978 | ||
32e93fb7 DB |
1979 | bpf_aux.fds = ctx->map_fds; |
1980 | bpf_aux.ent = ctx->maps; | |
1981 | bpf_aux.st = &ctx->stat; | |
6256f8c9 | 1982 | bpf_aux.obj = obj; |
6256f8c9 | 1983 | |
4bd62446 | 1984 | ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux, |
32e93fb7 | 1985 | bpf_maps_count(ctx)); |
6256f8c9 | 1986 | if (ret < 0) |
4bd62446 DB |
1987 | fprintf(stderr, "Cannot send fds to %s: %s\n", |
1988 | path, strerror(errno)); | |
1989 | ||
32e93fb7 | 1990 | bpf_maps_teardown(ctx); |
4bd62446 DB |
1991 | close(fd); |
1992 | return ret; | |
1993 | } | |
1994 | ||
1995 | int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux, | |
1996 | unsigned int entries) | |
1997 | { | |
1998 | struct sockaddr_un addr; | |
1999 | int fd, ret; | |
2000 | ||
2001 | fd = socket(AF_UNIX, SOCK_DGRAM, 0); | |
2002 | if (fd < 0) { | |
2003 | fprintf(stderr, "Cannot open socket: %s\n", | |
2004 | strerror(errno)); | |
2005 | return -1; | |
2006 | } | |
2007 | ||
2008 | memset(&addr, 0, sizeof(addr)); | |
2009 | addr.sun_family = AF_UNIX; | |
2010 | strncpy(addr.sun_path, path, sizeof(addr.sun_path)); | |
2011 | ||
2012 | ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); | |
2013 | if (ret < 0) { | |
2014 | fprintf(stderr, "Cannot bind to socket: %s\n", | |
2015 | strerror(errno)); | |
2016 | return -1; | |
2017 | } | |
2018 | ||
2019 | ret = bpf_map_set_recv(fd, fds, aux, entries); | |
2020 | if (ret < 0) | |
2021 | fprintf(stderr, "Cannot recv fds from %s: %s\n", | |
6256f8c9 DB |
2022 | path, strerror(errno)); |
2023 | ||
4bd62446 | 2024 | unlink(addr.sun_path); |
6256f8c9 DB |
2025 | close(fd); |
2026 | return ret; | |
2027 | } | |
11c39b5e | 2028 | #endif /* HAVE_ELF */ |