]>
Commit | Line | Data |
---|---|---|
1cad0788 MK |
1 | // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) |
2 | ||
3 | /* | |
4 | * AF_XDP user-space access library. | |
5 | * | |
6 | * Copyright(c) 2018 - 2019 Intel Corporation. | |
7 | * | |
8 | * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> | |
9 | */ | |
10 | ||
11 | #include <errno.h> | |
12 | #include <stdlib.h> | |
13 | #include <string.h> | |
14 | #include <unistd.h> | |
15 | #include <arpa/inet.h> | |
16 | #include <asm/barrier.h> | |
17 | #include <linux/compiler.h> | |
18 | #include <linux/ethtool.h> | |
19 | #include <linux/filter.h> | |
20 | #include <linux/if_ether.h> | |
21 | #include <linux/if_packet.h> | |
22 | #include <linux/if_xdp.h> | |
23 | #include <linux/sockios.h> | |
24 | #include <net/if.h> | |
25 | #include <sys/ioctl.h> | |
26 | #include <sys/mman.h> | |
27 | #include <sys/socket.h> | |
28 | #include <sys/types.h> | |
29 | ||
30 | #include "bpf.h" | |
31 | #include "libbpf.h" | |
d72386fe | 32 | #include "libbpf_internal.h" |
1cad0788 MK |
33 | #include "xsk.h" |
34 | ||
35 | #ifndef SOL_XDP | |
36 | #define SOL_XDP 283 | |
37 | #endif | |
38 | ||
39 | #ifndef AF_XDP | |
40 | #define AF_XDP 44 | |
41 | #endif | |
42 | ||
43 | #ifndef PF_XDP | |
44 | #define PF_XDP AF_XDP | |
45 | #endif | |
46 | ||
47 | struct xsk_umem { | |
48 | struct xsk_ring_prod *fill; | |
49 | struct xsk_ring_cons *comp; | |
50 | char *umem_area; | |
51 | struct xsk_umem_config config; | |
52 | int fd; | |
53 | int refcount; | |
54 | }; | |
55 | ||
56 | struct xsk_socket { | |
57 | struct xsk_ring_cons *rx; | |
58 | struct xsk_ring_prod *tx; | |
59 | __u64 outstanding_tx; | |
60 | struct xsk_umem *umem; | |
61 | struct xsk_socket_config config; | |
62 | int fd; | |
1cad0788 MK |
63 | int ifindex; |
64 | int prog_fd; | |
1cad0788 MK |
65 | int xsks_map_fd; |
66 | __u32 queue_id; | |
67 | char ifname[IFNAMSIZ]; | |
2761ed4b | 68 | bool zc; |
1cad0788 MK |
69 | }; |
70 | ||
71 | struct xsk_nl_info { | |
72 | bool xdp_prog_attached; | |
73 | int ifindex; | |
74 | int fd; | |
75 | }; | |
76 | ||
77 | /* For 32-bit systems, we need to use mmap2 as the offsets are 64-bit. | |
78 | * Unfortunately, it is not part of glibc. | |
79 | */ | |
80 | static inline void *xsk_mmap(void *addr, size_t length, int prot, int flags, | |
81 | int fd, __u64 offset) | |
82 | { | |
83 | #ifdef __NR_mmap2 | |
84 | unsigned int page_shift = __builtin_ffs(getpagesize()) - 1; | |
85 | long ret = syscall(__NR_mmap2, addr, length, prot, flags, fd, | |
86 | (off_t)(offset >> page_shift)); | |
87 | ||
88 | return (void *)ret; | |
89 | #else | |
90 | return mmap(addr, length, prot, flags, fd, offset); | |
91 | #endif | |
92 | } | |
93 | ||
94 | int xsk_umem__fd(const struct xsk_umem *umem) | |
95 | { | |
96 | return umem ? umem->fd : -EINVAL; | |
97 | } | |
98 | ||
99 | int xsk_socket__fd(const struct xsk_socket *xsk) | |
100 | { | |
101 | return xsk ? xsk->fd : -EINVAL; | |
102 | } | |
103 | ||
104 | static bool xsk_page_aligned(void *buffer) | |
105 | { | |
106 | unsigned long addr = (unsigned long)buffer; | |
107 | ||
108 | return !(addr & (getpagesize() - 1)); | |
109 | } | |
110 | ||
111 | static void xsk_set_umem_config(struct xsk_umem_config *cfg, | |
112 | const struct xsk_umem_config *usr_cfg) | |
113 | { | |
114 | if (!usr_cfg) { | |
115 | cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; | |
116 | cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; | |
117 | cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; | |
118 | cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; | |
119 | return; | |
120 | } | |
121 | ||
122 | cfg->fill_size = usr_cfg->fill_size; | |
123 | cfg->comp_size = usr_cfg->comp_size; | |
124 | cfg->frame_size = usr_cfg->frame_size; | |
125 | cfg->frame_headroom = usr_cfg->frame_headroom; | |
126 | } | |
127 | ||
6bf21b54 MK |
128 | static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, |
129 | const struct xsk_socket_config *usr_cfg) | |
1cad0788 MK |
130 | { |
131 | if (!usr_cfg) { | |
132 | cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; | |
133 | cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; | |
134 | cfg->libbpf_flags = 0; | |
135 | cfg->xdp_flags = 0; | |
136 | cfg->bind_flags = 0; | |
6bf21b54 | 137 | return 0; |
1cad0788 MK |
138 | } |
139 | ||
6bf21b54 MK |
140 | if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) |
141 | return -EINVAL; | |
142 | ||
1cad0788 MK |
143 | cfg->rx_size = usr_cfg->rx_size; |
144 | cfg->tx_size = usr_cfg->tx_size; | |
145 | cfg->libbpf_flags = usr_cfg->libbpf_flags; | |
146 | cfg->xdp_flags = usr_cfg->xdp_flags; | |
147 | cfg->bind_flags = usr_cfg->bind_flags; | |
6bf21b54 MK |
148 | |
149 | return 0; | |
1cad0788 MK |
150 | } |
151 | ||
152 | int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, | |
153 | struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, | |
154 | const struct xsk_umem_config *usr_config) | |
155 | { | |
156 | struct xdp_mmap_offsets off; | |
157 | struct xdp_umem_reg mr; | |
158 | struct xsk_umem *umem; | |
159 | socklen_t optlen; | |
160 | void *map; | |
161 | int err; | |
162 | ||
163 | if (!umem_area || !umem_ptr || !fill || !comp) | |
164 | return -EFAULT; | |
165 | if (!size && !xsk_page_aligned(umem_area)) | |
166 | return -EINVAL; | |
167 | ||
168 | umem = calloc(1, sizeof(*umem)); | |
169 | if (!umem) | |
170 | return -ENOMEM; | |
171 | ||
172 | umem->fd = socket(AF_XDP, SOCK_RAW, 0); | |
173 | if (umem->fd < 0) { | |
174 | err = -errno; | |
175 | goto out_umem_alloc; | |
176 | } | |
177 | ||
178 | umem->umem_area = umem_area; | |
179 | xsk_set_umem_config(&umem->config, usr_config); | |
180 | ||
fec2f7ba | 181 | memset(&mr, 0, sizeof(mr)); |
1cad0788 MK |
182 | mr.addr = (uintptr_t)umem_area; |
183 | mr.len = size; | |
184 | mr.chunk_size = umem->config.frame_size; | |
185 | mr.headroom = umem->config.frame_headroom; | |
186 | ||
187 | err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); | |
188 | if (err) { | |
189 | err = -errno; | |
190 | goto out_socket; | |
191 | } | |
192 | err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING, | |
193 | &umem->config.fill_size, | |
194 | sizeof(umem->config.fill_size)); | |
195 | if (err) { | |
196 | err = -errno; | |
197 | goto out_socket; | |
198 | } | |
199 | err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, | |
200 | &umem->config.comp_size, | |
201 | sizeof(umem->config.comp_size)); | |
202 | if (err) { | |
203 | err = -errno; | |
204 | goto out_socket; | |
205 | } | |
206 | ||
207 | optlen = sizeof(off); | |
208 | err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); | |
209 | if (err) { | |
210 | err = -errno; | |
211 | goto out_socket; | |
212 | } | |
213 | ||
214 | map = xsk_mmap(NULL, off.fr.desc + | |
215 | umem->config.fill_size * sizeof(__u64), | |
216 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, | |
217 | umem->fd, XDP_UMEM_PGOFF_FILL_RING); | |
218 | if (map == MAP_FAILED) { | |
219 | err = -errno; | |
220 | goto out_socket; | |
221 | } | |
222 | ||
223 | umem->fill = fill; | |
224 | fill->mask = umem->config.fill_size - 1; | |
225 | fill->size = umem->config.fill_size; | |
226 | fill->producer = map + off.fr.producer; | |
227 | fill->consumer = map + off.fr.consumer; | |
228 | fill->ring = map + off.fr.desc; | |
229 | fill->cached_cons = umem->config.fill_size; | |
230 | ||
231 | map = xsk_mmap(NULL, | |
232 | off.cr.desc + umem->config.comp_size * sizeof(__u64), | |
233 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, | |
234 | umem->fd, XDP_UMEM_PGOFF_COMPLETION_RING); | |
235 | if (map == MAP_FAILED) { | |
236 | err = -errno; | |
237 | goto out_mmap; | |
238 | } | |
239 | ||
240 | umem->comp = comp; | |
241 | comp->mask = umem->config.comp_size - 1; | |
242 | comp->size = umem->config.comp_size; | |
243 | comp->producer = map + off.cr.producer; | |
244 | comp->consumer = map + off.cr.consumer; | |
245 | comp->ring = map + off.cr.desc; | |
246 | ||
247 | *umem_ptr = umem; | |
248 | return 0; | |
249 | ||
250 | out_mmap: | |
0e6741f0 | 251 | munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); |
1cad0788 MK |
252 | out_socket: |
253 | close(umem->fd); | |
254 | out_umem_alloc: | |
255 | free(umem); | |
256 | return err; | |
257 | } | |
258 | ||
259 | static int xsk_load_xdp_prog(struct xsk_socket *xsk) | |
260 | { | |
50bd645b MK |
261 | static const int log_buf_size = 16 * 1024; |
262 | char log_buf[log_buf_size]; | |
1cad0788 MK |
263 | int err, prog_fd; |
264 | ||
265 | /* This is the C-program: | |
266 | * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) | |
267 | * { | |
10a13bb4 | 268 | * int index = ctx->rx_queue_index; |
1cad0788 MK |
269 | * |
270 | * // A set entry here means that the correspnding queue_id | |
271 | * // has an active AF_XDP socket bound to it. | |
10a13bb4 | 272 | * if (bpf_map_lookup_elem(&xsks_map, &index)) |
1cad0788 MK |
273 | * return bpf_redirect_map(&xsks_map, index, 0); |
274 | * | |
275 | * return XDP_PASS; | |
276 | * } | |
277 | */ | |
278 | struct bpf_insn prog[] = { | |
279 | /* r1 = *(u32 *)(r1 + 16) */ | |
280 | BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16), | |
281 | /* *(u32 *)(r10 - 4) = r1 */ | |
282 | BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4), | |
283 | BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | |
284 | BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), | |
10a13bb4 | 285 | BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), |
1cad0788 MK |
286 | BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), |
287 | BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), | |
1cad0788 | 288 | BPF_MOV32_IMM(BPF_REG_0, 2), |
1cad0788 MK |
289 | /* if r1 == 0 goto +5 */ |
290 | BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), | |
291 | /* r2 = *(u32 *)(r10 - 4) */ | |
292 | BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), | |
293 | BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), | |
294 | BPF_MOV32_IMM(BPF_REG_3, 0), | |
295 | BPF_EMIT_CALL(BPF_FUNC_redirect_map), | |
296 | /* The jumps are to this instruction */ | |
297 | BPF_EXIT_INSN(), | |
298 | }; | |
299 | size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); | |
300 | ||
301 | prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, | |
50bd645b MK |
302 | "LGPL-2.1 or BSD-2-Clause", 0, log_buf, |
303 | log_buf_size); | |
1cad0788 | 304 | if (prog_fd < 0) { |
50bd645b | 305 | pr_warning("BPF log buffer:\n%s", log_buf); |
1cad0788 MK |
306 | return prog_fd; |
307 | } | |
308 | ||
309 | err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags); | |
310 | if (err) { | |
311 | close(prog_fd); | |
312 | return err; | |
313 | } | |
314 | ||
315 | xsk->prog_fd = prog_fd; | |
316 | return 0; | |
317 | } | |
318 | ||
319 | static int xsk_get_max_queues(struct xsk_socket *xsk) | |
320 | { | |
decb705e IM |
321 | struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; |
322 | struct ifreq ifr = {}; | |
1cad0788 MK |
323 | int fd, err, ret; |
324 | ||
325 | fd = socket(AF_INET, SOCK_DGRAM, 0); | |
326 | if (fd < 0) | |
327 | return -errno; | |
328 | ||
1cad0788 | 329 | ifr.ifr_data = (void *)&channels; |
cb8ffde5 | 330 | memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1); |
cdfc7f88 | 331 | ifr.ifr_name[IFNAMSIZ - 1] = '\0'; |
1cad0788 MK |
332 | err = ioctl(fd, SIOCETHTOOL, &ifr); |
333 | if (err && errno != EOPNOTSUPP) { | |
334 | ret = -errno; | |
335 | goto out; | |
336 | } | |
337 | ||
decb705e | 338 | if (err || channels.max_combined == 0) |
1cad0788 MK |
339 | /* If the device says it has no channels, then all traffic |
340 | * is sent to a single stream, so max queues = 1. | |
341 | */ | |
342 | ret = 1; | |
343 | else | |
344 | ret = channels.max_combined; | |
345 | ||
346 | out: | |
347 | close(fd); | |
348 | return ret; | |
349 | } | |
350 | ||
351 | static int xsk_create_bpf_maps(struct xsk_socket *xsk) | |
352 | { | |
353 | int max_queues; | |
354 | int fd; | |
355 | ||
356 | max_queues = xsk_get_max_queues(xsk); | |
357 | if (max_queues < 0) | |
358 | return max_queues; | |
359 | ||
10a13bb4 | 360 | fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", |
1cad0788 MK |
361 | sizeof(int), sizeof(int), max_queues, 0); |
362 | if (fd < 0) | |
363 | return fd; | |
1cad0788 | 364 | |
1cad0788 MK |
365 | xsk->xsks_map_fd = fd; |
366 | ||
367 | return 0; | |
368 | } | |
369 | ||
370 | static void xsk_delete_bpf_maps(struct xsk_socket *xsk) | |
371 | { | |
10a13bb4 | 372 | bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id); |
1cad0788 MK |
373 | close(xsk->xsks_map_fd); |
374 | } | |
375 | ||
5750902a | 376 | static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) |
1cad0788 | 377 | { |
5750902a BT |
378 | __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info); |
379 | __u32 map_len = sizeof(struct bpf_map_info); | |
1cad0788 | 380 | struct bpf_prog_info prog_info = {}; |
1cad0788 | 381 | struct bpf_map_info map_info; |
5750902a | 382 | int fd, err; |
1cad0788 MK |
383 | |
384 | err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); | |
385 | if (err) | |
386 | return err; | |
387 | ||
388 | num_maps = prog_info.nr_map_ids; | |
389 | ||
390 | map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids)); | |
391 | if (!map_ids) | |
392 | return -ENOMEM; | |
393 | ||
394 | memset(&prog_info, 0, prog_len); | |
395 | prog_info.nr_map_ids = num_maps; | |
396 | prog_info.map_ids = (__u64)(unsigned long)map_ids; | |
397 | ||
398 | err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); | |
399 | if (err) | |
400 | goto out_map_ids; | |
401 | ||
10a13bb4 | 402 | xsk->xsks_map_fd = -1; |
1cad0788 | 403 | |
10a13bb4 | 404 | for (i = 0; i < prog_info.nr_map_ids; i++) { |
1cad0788 | 405 | fd = bpf_map_get_fd_by_id(map_ids[i]); |
5750902a BT |
406 | if (fd < 0) |
407 | continue; | |
1cad0788 MK |
408 | |
409 | err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); | |
5750902a BT |
410 | if (err) { |
411 | close(fd); | |
412 | continue; | |
413 | } | |
1cad0788 | 414 | |
5750902a | 415 | if (!strcmp(map_info.name, "xsks_map")) { |
1cad0788 | 416 | xsk->xsks_map_fd = fd; |
5750902a | 417 | continue; |
1cad0788 MK |
418 | } |
419 | ||
5750902a | 420 | close(fd); |
1cad0788 MK |
421 | } |
422 | ||
5750902a | 423 | err = 0; |
10a13bb4 | 424 | if (xsk->xsks_map_fd == -1) |
1cad0788 | 425 | err = -ENOENT; |
1cad0788 | 426 | |
1cad0788 MK |
427 | out_map_ids: |
428 | free(map_ids); | |
429 | return err; | |
430 | } | |
431 | ||
5750902a BT |
432 | static int xsk_set_bpf_maps(struct xsk_socket *xsk) |
433 | { | |
10a13bb4 JL |
434 | return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, |
435 | &xsk->fd, 0); | |
5750902a BT |
436 | } |
437 | ||
1cad0788 MK |
438 | static int xsk_setup_xdp_prog(struct xsk_socket *xsk) |
439 | { | |
1cad0788 MK |
440 | __u32 prog_id = 0; |
441 | int err; | |
442 | ||
443 | err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id, | |
444 | xsk->config.xdp_flags); | |
445 | if (err) | |
446 | return err; | |
447 | ||
448 | if (!prog_id) { | |
1cad0788 MK |
449 | err = xsk_create_bpf_maps(xsk); |
450 | if (err) | |
451 | return err; | |
452 | ||
453 | err = xsk_load_xdp_prog(xsk); | |
10a13bb4 JL |
454 | if (err) { |
455 | xsk_delete_bpf_maps(xsk); | |
456 | return err; | |
457 | } | |
1cad0788 MK |
458 | } else { |
459 | xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); | |
43b9037a AN |
460 | if (xsk->prog_fd < 0) |
461 | return -errno; | |
5750902a | 462 | err = xsk_lookup_bpf_maps(xsk); |
10a13bb4 JL |
463 | if (err) { |
464 | close(xsk->prog_fd); | |
465 | return err; | |
466 | } | |
1cad0788 MK |
467 | } |
468 | ||
5750902a | 469 | err = xsk_set_bpf_maps(xsk); |
10a13bb4 JL |
470 | if (err) { |
471 | xsk_delete_bpf_maps(xsk); | |
472 | close(xsk->prog_fd); | |
473 | return err; | |
474 | } | |
1cad0788 MK |
475 | |
476 | return 0; | |
1cad0788 MK |
477 | } |
478 | ||
479 | int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, | |
480 | __u32 queue_id, struct xsk_umem *umem, | |
481 | struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, | |
482 | const struct xsk_socket_config *usr_config) | |
483 | { | |
0e6741f0 | 484 | void *rx_map = NULL, *tx_map = NULL; |
1cad0788 MK |
485 | struct sockaddr_xdp sxdp = {}; |
486 | struct xdp_mmap_offsets off; | |
2761ed4b | 487 | struct xdp_options opts; |
1cad0788 MK |
488 | struct xsk_socket *xsk; |
489 | socklen_t optlen; | |
1cad0788 MK |
490 | int err; |
491 | ||
492 | if (!umem || !xsk_ptr || !rx || !tx) | |
493 | return -EFAULT; | |
494 | ||
495 | if (umem->refcount) { | |
496 | pr_warning("Error: shared umems not supported by libbpf.\n"); | |
497 | return -EBUSY; | |
498 | } | |
499 | ||
500 | xsk = calloc(1, sizeof(*xsk)); | |
501 | if (!xsk) | |
502 | return -ENOMEM; | |
503 | ||
504 | if (umem->refcount++ > 0) { | |
505 | xsk->fd = socket(AF_XDP, SOCK_RAW, 0); | |
506 | if (xsk->fd < 0) { | |
507 | err = -errno; | |
508 | goto out_xsk_alloc; | |
509 | } | |
510 | } else { | |
511 | xsk->fd = umem->fd; | |
512 | } | |
513 | ||
514 | xsk->outstanding_tx = 0; | |
515 | xsk->queue_id = queue_id; | |
516 | xsk->umem = umem; | |
517 | xsk->ifindex = if_nametoindex(ifname); | |
518 | if (!xsk->ifindex) { | |
519 | err = -errno; | |
520 | goto out_socket; | |
521 | } | |
cb8ffde5 | 522 | memcpy(xsk->ifname, ifname, IFNAMSIZ - 1); |
763ff0e7 | 523 | xsk->ifname[IFNAMSIZ - 1] = '\0'; |
1cad0788 | 524 | |
6bf21b54 MK |
525 | err = xsk_set_xdp_socket_config(&xsk->config, usr_config); |
526 | if (err) | |
527 | goto out_socket; | |
1cad0788 MK |
528 | |
529 | if (rx) { | |
530 | err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, | |
531 | &xsk->config.rx_size, | |
532 | sizeof(xsk->config.rx_size)); | |
533 | if (err) { | |
534 | err = -errno; | |
535 | goto out_socket; | |
536 | } | |
537 | } | |
538 | if (tx) { | |
539 | err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, | |
540 | &xsk->config.tx_size, | |
541 | sizeof(xsk->config.tx_size)); | |
542 | if (err) { | |
543 | err = -errno; | |
544 | goto out_socket; | |
545 | } | |
546 | } | |
547 | ||
548 | optlen = sizeof(off); | |
549 | err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); | |
550 | if (err) { | |
551 | err = -errno; | |
552 | goto out_socket; | |
553 | } | |
554 | ||
555 | if (rx) { | |
0e6741f0 BT |
556 | rx_map = xsk_mmap(NULL, off.rx.desc + |
557 | xsk->config.rx_size * sizeof(struct xdp_desc), | |
558 | PROT_READ | PROT_WRITE, | |
559 | MAP_SHARED | MAP_POPULATE, | |
560 | xsk->fd, XDP_PGOFF_RX_RING); | |
561 | if (rx_map == MAP_FAILED) { | |
1cad0788 MK |
562 | err = -errno; |
563 | goto out_socket; | |
564 | } | |
565 | ||
566 | rx->mask = xsk->config.rx_size - 1; | |
567 | rx->size = xsk->config.rx_size; | |
0e6741f0 BT |
568 | rx->producer = rx_map + off.rx.producer; |
569 | rx->consumer = rx_map + off.rx.consumer; | |
570 | rx->ring = rx_map + off.rx.desc; | |
1cad0788 MK |
571 | } |
572 | xsk->rx = rx; | |
573 | ||
574 | if (tx) { | |
0e6741f0 BT |
575 | tx_map = xsk_mmap(NULL, off.tx.desc + |
576 | xsk->config.tx_size * sizeof(struct xdp_desc), | |
577 | PROT_READ | PROT_WRITE, | |
578 | MAP_SHARED | MAP_POPULATE, | |
579 | xsk->fd, XDP_PGOFF_TX_RING); | |
580 | if (tx_map == MAP_FAILED) { | |
1cad0788 MK |
581 | err = -errno; |
582 | goto out_mmap_rx; | |
583 | } | |
584 | ||
585 | tx->mask = xsk->config.tx_size - 1; | |
586 | tx->size = xsk->config.tx_size; | |
0e6741f0 BT |
587 | tx->producer = tx_map + off.tx.producer; |
588 | tx->consumer = tx_map + off.tx.consumer; | |
589 | tx->ring = tx_map + off.tx.desc; | |
1cad0788 MK |
590 | tx->cached_cons = xsk->config.tx_size; |
591 | } | |
592 | xsk->tx = tx; | |
593 | ||
594 | sxdp.sxdp_family = PF_XDP; | |
595 | sxdp.sxdp_ifindex = xsk->ifindex; | |
596 | sxdp.sxdp_queue_id = xsk->queue_id; | |
597 | sxdp.sxdp_flags = xsk->config.bind_flags; | |
598 | ||
599 | err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); | |
600 | if (err) { | |
601 | err = -errno; | |
602 | goto out_mmap_tx; | |
603 | } | |
604 | ||
10a13bb4 | 605 | xsk->prog_fd = -1; |
2761ed4b MM |
606 | |
607 | optlen = sizeof(opts); | |
608 | err = getsockopt(xsk->fd, SOL_XDP, XDP_OPTIONS, &opts, &optlen); | |
609 | if (err) { | |
610 | err = -errno; | |
611 | goto out_mmap_tx; | |
612 | } | |
613 | ||
614 | xsk->zc = opts.flags & XDP_OPTIONS_ZEROCOPY; | |
615 | ||
1cad0788 MK |
616 | if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { |
617 | err = xsk_setup_xdp_prog(xsk); | |
618 | if (err) | |
619 | goto out_mmap_tx; | |
620 | } | |
621 | ||
622 | *xsk_ptr = xsk; | |
623 | return 0; | |
624 | ||
625 | out_mmap_tx: | |
626 | if (tx) | |
0e6741f0 | 627 | munmap(tx_map, off.tx.desc + |
1cad0788 MK |
628 | xsk->config.tx_size * sizeof(struct xdp_desc)); |
629 | out_mmap_rx: | |
630 | if (rx) | |
0e6741f0 | 631 | munmap(rx_map, off.rx.desc + |
1cad0788 MK |
632 | xsk->config.rx_size * sizeof(struct xdp_desc)); |
633 | out_socket: | |
634 | if (--umem->refcount) | |
635 | close(xsk->fd); | |
636 | out_xsk_alloc: | |
637 | free(xsk); | |
638 | return err; | |
639 | } | |
640 | ||
641 | int xsk_umem__delete(struct xsk_umem *umem) | |
642 | { | |
643 | struct xdp_mmap_offsets off; | |
644 | socklen_t optlen; | |
645 | int err; | |
646 | ||
647 | if (!umem) | |
648 | return 0; | |
649 | ||
650 | if (umem->refcount) | |
651 | return -EBUSY; | |
652 | ||
653 | optlen = sizeof(off); | |
654 | err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); | |
655 | if (!err) { | |
d24ed99b BT |
656 | munmap(umem->fill->ring - off.fr.desc, |
657 | off.fr.desc + umem->config.fill_size * sizeof(__u64)); | |
658 | munmap(umem->comp->ring - off.cr.desc, | |
659 | off.cr.desc + umem->config.comp_size * sizeof(__u64)); | |
1cad0788 MK |
660 | } |
661 | ||
662 | close(umem->fd); | |
663 | free(umem); | |
664 | ||
665 | return 0; | |
666 | } | |
667 | ||
668 | void xsk_socket__delete(struct xsk_socket *xsk) | |
669 | { | |
0e6741f0 | 670 | size_t desc_sz = sizeof(struct xdp_desc); |
1cad0788 MK |
671 | struct xdp_mmap_offsets off; |
672 | socklen_t optlen; | |
673 | int err; | |
674 | ||
675 | if (!xsk) | |
676 | return; | |
677 | ||
10a13bb4 JL |
678 | if (xsk->prog_fd != -1) { |
679 | xsk_delete_bpf_maps(xsk); | |
680 | close(xsk->prog_fd); | |
681 | } | |
1cad0788 MK |
682 | |
683 | optlen = sizeof(off); | |
684 | err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); | |
685 | if (!err) { | |
0e6741f0 | 686 | if (xsk->rx) { |
d24ed99b BT |
687 | munmap(xsk->rx->ring - off.rx.desc, |
688 | off.rx.desc + xsk->config.rx_size * desc_sz); | |
0e6741f0 BT |
689 | } |
690 | if (xsk->tx) { | |
d24ed99b BT |
691 | munmap(xsk->tx->ring - off.tx.desc, |
692 | off.tx.desc + xsk->config.tx_size * desc_sz); | |
0e6741f0 BT |
693 | } |
694 | ||
1cad0788 MK |
695 | } |
696 | ||
697 | xsk->umem->refcount--; | |
698 | /* Do not close an fd that also has an associated umem connected | |
699 | * to it. | |
700 | */ | |
701 | if (xsk->fd != xsk->umem->fd) | |
702 | close(xsk->fd); | |
703 | free(xsk); | |
704 | } |