]> git.proxmox.com Git - mirror_iproute2.git/blob - tc/tc_bpf.c
tc: add eBPF support to f_bpf
[mirror_iproute2.git] / tc / tc_bpf.c
1 /*
2 * tc_bpf.c BPF common code
3 *
4 * This program is free software; you can distribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Daniel Borkmann <dborkman@redhat.com>
10 * Jiri Pirko <jiri@resnulli.us>
11 * Alexei Starovoitov <ast@plumgrid.com>
12 */
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <stdbool.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stdarg.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <linux/filter.h>
25 #include <linux/netlink.h>
26 #include <linux/rtnetlink.h>
27
28 #ifdef HAVE_ELF
29 #include <libelf.h>
30 #include <gelf.h>
31 #endif
32
33 #include "utils.h"
34 #include "tc_util.h"
35 #include "tc_bpf.h"
36
37 int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
38 char **bpf_string, bool *need_release,
39 const char separator)
40 {
41 char sp;
42
43 if (from_file) {
44 size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
45 char *tmp_string;
46 FILE *fp;
47
48 tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
49 tmp_string = malloc(tmp_len);
50 if (tmp_string == NULL)
51 return -ENOMEM;
52
53 memset(tmp_string, 0, tmp_len);
54
55 fp = fopen(arg, "r");
56 if (fp == NULL) {
57 perror("Cannot fopen");
58 free(tmp_string);
59 return -ENOENT;
60 }
61
62 if (!fgets(tmp_string, tmp_len, fp)) {
63 free(tmp_string);
64 fclose(fp);
65 return -EIO;
66 }
67
68 fclose(fp);
69
70 *need_release = true;
71 *bpf_string = tmp_string;
72 } else {
73 *need_release = false;
74 *bpf_string = arg;
75 }
76
77 if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
78 sp != separator) {
79 if (*need_release)
80 free(*bpf_string);
81 return -EINVAL;
82 }
83
84 return 0;
85 }
86
87 int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
88 bool from_file)
89 {
90 char *bpf_string, *token, separator = ',';
91 int ret = 0, i = 0;
92 bool need_release;
93 __u16 bpf_len = 0;
94
95 if (argc < 1)
96 return -EINVAL;
97 if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
98 &need_release, separator))
99 return -EINVAL;
100 if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
101 ret = -EINVAL;
102 goto out;
103 }
104
105 token = bpf_string;
106 while ((token = strchr(token, separator)) && (++token)[0]) {
107 if (i >= bpf_len) {
108 fprintf(stderr, "Real program length exceeds encoded "
109 "length parameter!\n");
110 ret = -EINVAL;
111 goto out;
112 }
113
114 if (sscanf(token, "%hu %hhu %hhu %u,",
115 &bpf_ops[i].code, &bpf_ops[i].jt,
116 &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
117 fprintf(stderr, "Error at instruction %d!\n", i);
118 ret = -EINVAL;
119 goto out;
120 }
121
122 i++;
123 }
124
125 if (i != bpf_len) {
126 fprintf(stderr, "Parsed program length is less than encoded"
127 "length parameter!\n");
128 ret = -EINVAL;
129 goto out;
130 }
131 ret = bpf_len;
132
133 out:
134 if (need_release)
135 free(bpf_string);
136
137 return ret;
138 }
139
140 void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
141 {
142 struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops);
143 int i;
144
145 if (len == 0)
146 return;
147
148 fprintf(f, "bytecode \'%u,", len);
149
150 for (i = 0; i < len - 1; i++)
151 fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
152 ops[i].jf, ops[i].k);
153
154 fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt,
155 ops[i].jf, ops[i].k);
156 }
157
158 #ifdef HAVE_ELF
159 struct bpf_elf_sec_data {
160 GElf_Shdr sec_hdr;
161 char *sec_name;
162 Elf_Data *sec_data;
163 };
164
165 static char bpf_log_buf[8192];
166
167 static const char *prog_type_section(enum bpf_prog_type type)
168 {
169 switch (type) {
170 case BPF_PROG_TYPE_SCHED_CLS:
171 return ELF_SECTION_CLASSIFIER;
172 /* case BPF_PROG_TYPE_SCHED_ACT: */
173 /* return ELF_SECTION_ACTION; */
174 default:
175 return NULL;
176 }
177 }
178
179 static void bpf_dump_error(const char *format, ...) __check_format_string(1, 2);
180 static void bpf_dump_error(const char *format, ...)
181 {
182 va_list vl;
183
184 va_start(vl, format);
185 vfprintf(stderr, format, vl);
186 va_end(vl);
187
188 fprintf(stderr, "%s", bpf_log_buf);
189 memset(bpf_log_buf, 0, sizeof(bpf_log_buf));
190 }
191
192 static int bpf_create_map(enum bpf_map_type type, unsigned int size_key,
193 unsigned int size_value, unsigned int max_elem)
194 {
195 union bpf_attr attr = {
196 .map_type = type,
197 .key_size = size_key,
198 .value_size = size_value,
199 .max_entries = max_elem,
200 };
201
202 return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
203 }
204
205 static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
206 unsigned int len, const char *license)
207 {
208 union bpf_attr attr = {
209 .prog_type = type,
210 .insns = bpf_ptr_to_u64(insns),
211 .insn_cnt = len / sizeof(struct bpf_insn),
212 .license = bpf_ptr_to_u64(license),
213 .log_buf = bpf_ptr_to_u64(bpf_log_buf),
214 .log_size = sizeof(bpf_log_buf),
215 .log_level = 1,
216 };
217
218 return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
219 }
220
221 static int bpf_prog_attach(enum bpf_prog_type type, const struct bpf_insn *insns,
222 unsigned int size, const char *license)
223 {
224 int prog_fd = bpf_prog_load(type, insns, size, license);
225
226 if (prog_fd < 0)
227 bpf_dump_error("BPF program rejected: %s\n", strerror(errno));
228
229 return prog_fd;
230 }
231
232 static int bpf_map_attach(enum bpf_map_type type, unsigned int size_key,
233 unsigned int size_value, unsigned int max_elem)
234 {
235 int map_fd = bpf_create_map(type, size_key, size_value, max_elem);
236
237 if (map_fd < 0)
238 bpf_dump_error("BPF map rejected: %s\n", strerror(errno));
239
240 return map_fd;
241 }
242
243 static void bpf_maps_init(int *map_fds, unsigned int max_fds)
244 {
245 int i;
246
247 for (i = 0; i < max_fds; i++)
248 map_fds[i] = -1;
249 }
250
251 static void bpf_maps_destroy(const int *map_fds, unsigned int max_fds)
252 {
253 int i;
254
255 for (i = 0; i < max_fds; i++) {
256 if (map_fds[i] >= 0)
257 close(map_fds[i]);
258 }
259 }
260
261 static int bpf_maps_attach(struct bpf_elf_map *maps, unsigned int num_maps,
262 int *map_fds, unsigned int max_fds)
263 {
264 int i, ret;
265
266 for (i = 0; i < num_maps && num_maps <= max_fds; i++) {
267 struct bpf_elf_map *map = &maps[i];
268
269 ret = bpf_map_attach(map->type, map->size_key,
270 map->size_value, map->max_elem);
271 if (ret < 0)
272 goto err_unwind;
273
274 map_fds[i] = ret;
275 }
276
277 return 0;
278
279 err_unwind:
280 bpf_maps_destroy(map_fds, i);
281 return ret;
282 }
283
284 static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index,
285 struct bpf_elf_sec_data *sec_data)
286 {
287 GElf_Shdr sec_hdr;
288 Elf_Scn *sec_fd;
289 Elf_Data *sec_edata;
290 char *sec_name;
291
292 memset(sec_data, 0, sizeof(*sec_data));
293
294 sec_fd = elf_getscn(elf_fd, sec_index);
295 if (!sec_fd)
296 return -EINVAL;
297
298 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
299 return -EIO;
300
301 sec_name = elf_strptr(elf_fd, elf_hdr->e_shstrndx,
302 sec_hdr.sh_name);
303 if (!sec_name || !sec_hdr.sh_size)
304 return -ENOENT;
305
306 sec_edata = elf_getdata(sec_fd, NULL);
307 if (!sec_edata || elf_getdata(sec_fd, sec_edata))
308 return -EIO;
309
310 memcpy(&sec_data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
311 sec_data->sec_name = sec_name;
312 sec_data->sec_data = sec_edata;
313
314 return 0;
315 }
316
317 static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo,
318 struct bpf_elf_sec_data *data_insn,
319 Elf_Data *sym_tab, int *map_fds, int max_fds)
320 {
321 Elf_Data *idata = data_insn->sec_data;
322 GElf_Shdr *rhdr = &data_relo->sec_hdr;
323 int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
324 struct bpf_insn *insns = idata->d_buf;
325 unsigned int num_insns = idata->d_size / sizeof(*insns);
326
327 for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
328 unsigned int ioff, fnum;
329 GElf_Rel relo;
330 GElf_Sym sym;
331
332 if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
333 return -EIO;
334
335 ioff = relo.r_offset / sizeof(struct bpf_insn);
336 if (ioff >= num_insns)
337 return -EINVAL;
338 if (insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW))
339 return -EINVAL;
340
341 if (gelf_getsym(sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
342 return -EIO;
343
344 fnum = sym.st_value / sizeof(struct bpf_elf_map);
345 if (fnum >= max_fds)
346 return -EINVAL;
347
348 insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
349 insns[ioff].imm = map_fds[fnum];
350 }
351
352 return 0;
353 }
354
355 static int bpf_fetch_ancillary(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
356 int *map_fds, unsigned int max_fds,
357 char *license, unsigned int lic_len,
358 Elf_Data **sym_tab)
359 {
360 int sec_index, ret = -1;
361
362 for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
363 struct bpf_elf_sec_data data_anc;
364
365 ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
366 &data_anc);
367 if (ret < 0)
368 continue;
369
370 /* Extract and load eBPF map fds. */
371 if (!strcmp(data_anc.sec_name, ELF_SECTION_MAPS)) {
372 struct bpf_elf_map *maps = data_anc.sec_data->d_buf;
373 unsigned int maps_num = data_anc.sec_data->d_size /
374 sizeof(*maps);
375
376 sec_seen[sec_index] = true;
377 ret = bpf_maps_attach(maps, maps_num, map_fds,
378 max_fds);
379 if (ret < 0)
380 return ret;
381 }
382 /* Extract eBPF license. */
383 else if (!strcmp(data_anc.sec_name, ELF_SECTION_LICENSE)) {
384 if (data_anc.sec_data->d_size > lic_len)
385 return -ENOMEM;
386
387 sec_seen[sec_index] = true;
388 memcpy(license, data_anc.sec_data->d_buf,
389 data_anc.sec_data->d_size);
390 }
391 /* Extract symbol table for relocations (map fd fixups). */
392 else if (data_anc.sec_hdr.sh_type == SHT_SYMTAB) {
393 sec_seen[sec_index] = true;
394 *sym_tab = data_anc.sec_data;
395 }
396 }
397
398 return ret;
399 }
400
401 static int bpf_fetch_prog_relo(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
402 enum bpf_prog_type type, char *license,
403 Elf_Data *sym_tab, int *map_fds, unsigned int max_fds)
404 {
405 int sec_index, prog_fd = -1;
406
407 for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
408 struct bpf_elf_sec_data data_relo, data_insn;
409 int ins_index, ret;
410
411 /* Attach eBPF programs with relocation data (maps). */
412 ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
413 &data_relo);
414 if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
415 continue;
416
417 ins_index = data_relo.sec_hdr.sh_info;
418
419 ret = bpf_fill_section_data(elf_fd, elf_hdr, ins_index,
420 &data_insn);
421 if (ret < 0)
422 continue;
423 if (strcmp(data_insn.sec_name, prog_type_section(type)))
424 continue;
425
426 sec_seen[sec_index] = true;
427 sec_seen[ins_index] = true;
428
429 ret = bpf_apply_relo_data(&data_relo, &data_insn, sym_tab,
430 map_fds, max_fds);
431 if (ret < 0)
432 continue;
433
434 prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf,
435 data_insn.sec_data->d_size, license);
436 if (prog_fd < 0)
437 continue;
438
439 break;
440 }
441
442 return prog_fd;
443 }
444
445 static int bpf_fetch_prog(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
446 enum bpf_prog_type type, char *license)
447 {
448 int sec_index, prog_fd = -1;
449
450 for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
451 struct bpf_elf_sec_data data_insn;
452 int ret;
453
454 /* Attach eBPF programs without relocation data. */
455 if (sec_seen[sec_index])
456 continue;
457
458 ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
459 &data_insn);
460 if (ret < 0)
461 continue;
462 if (strcmp(data_insn.sec_name, prog_type_section(type)))
463 continue;
464
465 prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf,
466 data_insn.sec_data->d_size, license);
467 if (prog_fd < 0)
468 continue;
469
470 break;
471 }
472
473 return prog_fd;
474 }
475
476 int bpf_open_object(const char *path, enum bpf_prog_type type)
477 {
478 int map_fds[ELF_MAX_MAPS], max_fds = ARRAY_SIZE(map_fds);
479 char license[ELF_MAX_LICENSE_LEN];
480 int file_fd, prog_fd = -1, ret;
481 Elf_Data *sym_tab = NULL;
482 GElf_Ehdr elf_hdr;
483 bool *sec_seen;
484 Elf *elf_fd;
485
486 if (elf_version(EV_CURRENT) == EV_NONE)
487 return -EINVAL;
488
489 file_fd = open(path, O_RDONLY, 0);
490 if (file_fd < 0)
491 return -errno;
492
493 elf_fd = elf_begin(file_fd, ELF_C_READ, NULL);
494 if (!elf_fd) {
495 ret = -EINVAL;
496 goto out;
497 }
498
499 if (gelf_getehdr(elf_fd, &elf_hdr) != &elf_hdr) {
500 ret = -EIO;
501 goto out_elf;
502 }
503
504 sec_seen = calloc(elf_hdr.e_shnum, sizeof(*sec_seen));
505 if (!sec_seen) {
506 ret = -ENOMEM;
507 goto out_elf;
508 }
509
510 memset(license, 0, sizeof(license));
511 bpf_maps_init(map_fds, max_fds);
512
513 ret = bpf_fetch_ancillary(elf_fd, &elf_hdr, sec_seen, map_fds, max_fds,
514 license, sizeof(license), &sym_tab);
515 if (ret < 0)
516 goto out_maps;
517 if (sym_tab)
518 prog_fd = bpf_fetch_prog_relo(elf_fd, &elf_hdr, sec_seen, type,
519 license, sym_tab, map_fds, max_fds);
520 if (prog_fd < 0)
521 prog_fd = bpf_fetch_prog(elf_fd, &elf_hdr, sec_seen, type,
522 license);
523 if (prog_fd < 0)
524 goto out_maps;
525 out_sec:
526 free(sec_seen);
527 out_elf:
528 elf_end(elf_fd);
529 out:
530 close(file_fd);
531 return prog_fd;
532
533 out_maps:
534 bpf_maps_destroy(map_fds, max_fds);
535 goto out_sec;
536 }
537
538 #endif /* HAVE_ELF */