]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
selftests/io_uring: test zerocopy send
authorPavel Begunkov <asml.silence@gmail.com>
Tue, 12 Jul 2022 20:52:51 +0000 (21:52 +0100)
committerJens Axboe <axboe@kernel.dk>
Mon, 25 Jul 2022 00:41:07 +0000 (18:41 -0600)
Add selftests for io_uring zerocopy sends and io_uring's notification
infrastructure. It's largely influenced by msg_zerocopy and uses it on
the receive side.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/03d5ec78061cf52db420f88ed0b48eb8f47ce9f7.1657643355.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/io_uring_zerocopy_tx.c [new file with mode: 0644]
tools/testing/selftests/net/io_uring_zerocopy_tx.sh [new file with mode: 0755]

index db05b3764b7716d0e4f5d21fe9d1d87759777973..9a4b30bd3a9ee7c070b5f3d42f81044eca2f9802 100644 (file)
@@ -59,6 +59,7 @@ TEST_GEN_FILES += toeplitz
 TEST_GEN_FILES += cmsg_sender
 TEST_GEN_FILES += stress_reuseport_listen
 TEST_PROGS += test_vxlan_vnifiltering.sh
+TEST_GEN_FILES += io_uring_zerocopy_tx
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
new file mode 100644 (file)
index 0000000..9d64c56
--- /dev/null
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: MIT */
+/* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/io_uring.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+
+#define NOTIF_TAG 0xfffffffULL
+#define NONZC_TAG 0
+#define ZC_TAG 1
+
+enum {
+       MODE_NONZC      = 0,
+       MODE_ZC         = 1,
+       MODE_ZC_FIXED   = 2,
+       MODE_MIXED      = 3,
+};
+
+static bool cfg_flush          = false;
+static bool cfg_cork           = false;
+static int  cfg_mode           = MODE_ZC_FIXED;
+static int  cfg_nr_reqs                = 8;
+static int  cfg_family         = PF_UNSPEC;
+static int  cfg_payload_len;
+static int  cfg_port           = 8000;
+static int  cfg_runtime_ms     = 4200;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static char payload[IP_MAXPACKET] __attribute__((aligned(4096)));
+
+struct io_sq_ring {
+       unsigned *head;
+       unsigned *tail;
+       unsigned *ring_mask;
+       unsigned *ring_entries;
+       unsigned *flags;
+       unsigned *array;
+};
+
+struct io_cq_ring {
+       unsigned *head;
+       unsigned *tail;
+       unsigned *ring_mask;
+       unsigned *ring_entries;
+       struct io_uring_cqe *cqes;
+};
+
+struct io_uring_sq {
+       unsigned *khead;
+       unsigned *ktail;
+       unsigned *kring_mask;
+       unsigned *kring_entries;
+       unsigned *kflags;
+       unsigned *kdropped;
+       unsigned *array;
+       struct io_uring_sqe *sqes;
+
+       unsigned sqe_head;
+       unsigned sqe_tail;
+
+       size_t ring_sz;
+};
+
+struct io_uring_cq {
+       unsigned *khead;
+       unsigned *ktail;
+       unsigned *kring_mask;
+       unsigned *kring_entries;
+       unsigned *koverflow;
+       struct io_uring_cqe *cqes;
+
+       size_t ring_sz;
+};
+
+struct io_uring {
+       struct io_uring_sq sq;
+       struct io_uring_cq cq;
+       int ring_fd;
+};
+
+#ifdef __alpha__
+# ifndef __NR_io_uring_setup
+#  define __NR_io_uring_setup          535
+# endif
+# ifndef __NR_io_uring_enter
+#  define __NR_io_uring_enter          536
+# endif
+# ifndef __NR_io_uring_register
+#  define __NR_io_uring_register       537
+# endif
+#else /* !__alpha__ */
+# ifndef __NR_io_uring_setup
+#  define __NR_io_uring_setup          425
+# endif
+# ifndef __NR_io_uring_enter
+#  define __NR_io_uring_enter          426
+# endif
+# ifndef __NR_io_uring_register
+#  define __NR_io_uring_register       427
+# endif
+#endif
+
+#if defined(__x86_64) || defined(__i386__)
+#define read_barrier() __asm__ __volatile__("":::"memory")
+#define write_barrier()        __asm__ __volatile__("":::"memory")
+#else
+
+#define read_barrier() __sync_synchronize()
+#define write_barrier()        __sync_synchronize()
+#endif
+
+static int io_uring_setup(unsigned int entries, struct io_uring_params *p)
+{
+       return syscall(__NR_io_uring_setup, entries, p);
+}
+
+static int io_uring_enter(int fd, unsigned int to_submit,
+                         unsigned int min_complete,
+                         unsigned int flags, sigset_t *sig)
+{
+       return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
+                       flags, sig, _NSIG / 8);
+}
+
+static int io_uring_register_buffers(struct io_uring *ring,
+                                    const struct iovec *iovecs,
+                                    unsigned nr_iovecs)
+{
+       int ret;
+
+       ret = syscall(__NR_io_uring_register, ring->ring_fd,
+                     IORING_REGISTER_BUFFERS, iovecs, nr_iovecs);
+       return (ret < 0) ? -errno : ret;
+}
+
+static int io_uring_register_notifications(struct io_uring *ring,
+                                          unsigned nr,
+                                          struct io_uring_notification_slot *slots)
+{
+       int ret;
+       struct io_uring_notification_register r = {
+               .nr_slots = nr,
+               .data = (unsigned long)slots,
+       };
+
+       ret = syscall(__NR_io_uring_register, ring->ring_fd,
+                     IORING_REGISTER_NOTIFIERS, &r, sizeof(r));
+       return (ret < 0) ? -errno : ret;
+}
+
+static int io_uring_mmap(int fd, struct io_uring_params *p,
+                        struct io_uring_sq *sq, struct io_uring_cq *cq)
+{
+       size_t size;
+       void *ptr;
+       int ret;
+
+       sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
+       ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
+                  MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
+       if (ptr == MAP_FAILED)
+               return -errno;
+       sq->khead = ptr + p->sq_off.head;
+       sq->ktail = ptr + p->sq_off.tail;
+       sq->kring_mask = ptr + p->sq_off.ring_mask;
+       sq->kring_entries = ptr + p->sq_off.ring_entries;
+       sq->kflags = ptr + p->sq_off.flags;
+       sq->kdropped = ptr + p->sq_off.dropped;
+       sq->array = ptr + p->sq_off.array;
+
+       size = p->sq_entries * sizeof(struct io_uring_sqe);
+       sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
+                       MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
+       if (sq->sqes == MAP_FAILED) {
+               ret = -errno;
+err:
+               munmap(sq->khead, sq->ring_sz);
+               return ret;
+       }
+
+       cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
+       ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
+                       MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
+       if (ptr == MAP_FAILED) {
+               ret = -errno;
+               munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe));
+               goto err;
+       }
+       cq->khead = ptr + p->cq_off.head;
+       cq->ktail = ptr + p->cq_off.tail;
+       cq->kring_mask = ptr + p->cq_off.ring_mask;
+       cq->kring_entries = ptr + p->cq_off.ring_entries;
+       cq->koverflow = ptr + p->cq_off.overflow;
+       cq->cqes = ptr + p->cq_off.cqes;
+       return 0;
+}
+
+static int io_uring_queue_init(unsigned entries, struct io_uring *ring,
+                              unsigned flags)
+{
+       struct io_uring_params p;
+       int fd, ret;
+
+       memset(ring, 0, sizeof(*ring));
+       memset(&p, 0, sizeof(p));
+       p.flags = flags;
+
+       fd = io_uring_setup(entries, &p);
+       if (fd < 0)
+               return fd;
+       ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq);
+       if (!ret)
+               ring->ring_fd = fd;
+       else
+               close(fd);
+       return ret;
+}
+
+static int io_uring_submit(struct io_uring *ring)
+{
+       struct io_uring_sq *sq = &ring->sq;
+       const unsigned mask = *sq->kring_mask;
+       unsigned ktail, submitted, to_submit;
+       int ret;
+
+       read_barrier();
+       if (*sq->khead != *sq->ktail) {
+               submitted = *sq->kring_entries;
+               goto submit;
+       }
+       if (sq->sqe_head == sq->sqe_tail)
+               return 0;
+
+       ktail = *sq->ktail;
+       to_submit = sq->sqe_tail - sq->sqe_head;
+       for (submitted = 0; submitted < to_submit; submitted++) {
+               read_barrier();
+               sq->array[ktail++ & mask] = sq->sqe_head++ & mask;
+       }
+       if (!submitted)
+               return 0;
+
+       if (*sq->ktail != ktail) {
+               write_barrier();
+               *sq->ktail = ktail;
+               write_barrier();
+       }
+submit:
+       ret = io_uring_enter(ring->ring_fd, submitted, 0,
+                               IORING_ENTER_GETEVENTS, NULL);
+       return ret < 0 ? -errno : ret;
+}
+
+static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
+                                     const void *buf, size_t len, int flags)
+{
+       memset(sqe, 0, sizeof(*sqe));
+       sqe->opcode = (__u8) IORING_OP_SEND;
+       sqe->fd = sockfd;
+       sqe->addr = (unsigned long) buf;
+       sqe->len = len;
+       sqe->msg_flags = (__u32) flags;
+}
+
+static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd,
+                                       const void *buf, size_t len, int flags,
+                                       unsigned slot_idx, unsigned zc_flags)
+{
+       io_uring_prep_send(sqe, sockfd, buf, len, flags);
+       sqe->opcode = (__u8) IORING_OP_SENDZC_NOTIF;
+       sqe->notification_idx = slot_idx;
+       sqe->ioprio = zc_flags;
+}
+
+static struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
+{
+       struct io_uring_sq *sq = &ring->sq;
+
+       if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries)
+               return NULL;
+       return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask];
+}
+
+static int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
+{
+       struct io_uring_cq *cq = &ring->cq;
+       const unsigned mask = *cq->kring_mask;
+       unsigned head = *cq->khead;
+       int ret;
+
+       *cqe_ptr = NULL;
+       do {
+               read_barrier();
+               if (head != *cq->ktail) {
+                       *cqe_ptr = &cq->cqes[head & mask];
+                       break;
+               }
+               ret = io_uring_enter(ring->ring_fd, 0, 1,
+                                       IORING_ENTER_GETEVENTS, NULL);
+               if (ret < 0)
+                       return -errno;
+       } while (1);
+
+       return 0;
+}
+
+static inline void io_uring_cqe_seen(struct io_uring *ring)
+{
+       *(&ring->cq)->khead += 1;
+       write_barrier();
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+       struct timeval tv;
+
+       gettimeofday(&tv, NULL);
+       return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+       if (setsockopt(fd, level, optname, &val, sizeof(val)))
+               error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
+}
+
+static int do_setup_tx(int domain, int type, int protocol)
+{
+       int fd;
+
+       fd = socket(domain, type, protocol);
+       if (fd == -1)
+               error(1, errno, "socket t");
+
+       do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
+
+       if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
+               error(1, errno, "connect");
+       return fd;
+}
+
+static void do_tx(int domain, int type, int protocol)
+{
+       struct io_uring_notification_slot b[1] = {{.tag = NOTIF_TAG}};
+       struct io_uring_sqe *sqe;
+       struct io_uring_cqe *cqe;
+       unsigned long packets = 0, bytes = 0;
+       struct io_uring ring;
+       struct iovec iov;
+       uint64_t tstop;
+       int i, fd, ret;
+       int compl_cqes = 0;
+
+       fd = do_setup_tx(domain, type, protocol);
+
+       ret = io_uring_queue_init(512, &ring, 0);
+       if (ret)
+               error(1, ret, "io_uring: queue init");
+
+       ret = io_uring_register_notifications(&ring, 1, b);
+       if (ret)
+               error(1, ret, "io_uring: tx ctx registration");
+
+       iov.iov_base = payload;
+       iov.iov_len = cfg_payload_len;
+
+       ret = io_uring_register_buffers(&ring, &iov, 1);
+       if (ret)
+               error(1, ret, "io_uring: buffer registration");
+
+       tstop = gettimeofday_ms() + cfg_runtime_ms;
+       do {
+               if (cfg_cork)
+                       do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
+
+               for (i = 0; i < cfg_nr_reqs; i++) {
+                       unsigned zc_flags = 0;
+                       unsigned buf_idx = 0;
+                       unsigned slot_idx = 0;
+                       unsigned mode = cfg_mode;
+                       unsigned msg_flags = 0;
+
+                       if (cfg_mode == MODE_MIXED)
+                               mode = rand() % 3;
+
+                       sqe = io_uring_get_sqe(&ring);
+
+                       if (mode == MODE_NONZC) {
+                               io_uring_prep_send(sqe, fd, payload,
+                                                  cfg_payload_len, msg_flags);
+                               sqe->user_data = NONZC_TAG;
+                       } else {
+                               if (cfg_flush) {
+                                       zc_flags |= IORING_RECVSEND_NOTIF_FLUSH;
+                                       compl_cqes++;
+                               }
+                               io_uring_prep_sendzc(sqe, fd, payload,
+                                                    cfg_payload_len,
+                                                    msg_flags, slot_idx, zc_flags);
+                               if (mode == MODE_ZC_FIXED) {
+                                       sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
+                                       sqe->buf_index = buf_idx;
+                               }
+                               sqe->user_data = ZC_TAG;
+                       }
+               }
+
+               ret = io_uring_submit(&ring);
+               if (ret != cfg_nr_reqs)
+                       error(1, ret, "submit");
+
+               for (i = 0; i < cfg_nr_reqs; i++) {
+                       ret = io_uring_wait_cqe(&ring, &cqe);
+                       if (ret)
+                               error(1, ret, "wait cqe");
+
+                       if (cqe->user_data == NOTIF_TAG) {
+                               compl_cqes--;
+                               i--;
+                       } else if (cqe->user_data != NONZC_TAG &&
+                                  cqe->user_data != ZC_TAG) {
+                               error(1, cqe->res, "invalid user_data");
+                       } else if (cqe->res <= 0 && cqe->res != -EAGAIN) {
+                               error(1, cqe->res, "send failed");
+                       } else {
+                               if (cqe->res > 0) {
+                                       packets++;
+                                       bytes += cqe->res;
+                               }
+                               /* failed requests don't flush */
+                               if (cfg_flush &&
+                                   cqe->res <= 0 &&
+                                   cqe->user_data == ZC_TAG)
+                                       compl_cqes--;
+                       }
+                       io_uring_cqe_seen(&ring);
+               }
+               if (cfg_cork)
+                       do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
+       } while (gettimeofday_ms() < tstop);
+
+       if (close(fd))
+               error(1, errno, "close");
+
+       fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n",
+                       packets, bytes >> 20,
+                       packets / (cfg_runtime_ms / 1000),
+                       (bytes >> 20) / (cfg_runtime_ms / 1000));
+
+       while (compl_cqes) {
+               ret = io_uring_wait_cqe(&ring, &cqe);
+               if (ret)
+                       error(1, ret, "wait cqe");
+               io_uring_cqe_seen(&ring);
+               compl_cqes--;
+       }
+}
+
+static void do_test(int domain, int type, int protocol)
+{
+       int i;
+
+       for (i = 0; i < IP_MAXPACKET; i++)
+               payload[i] = 'a' + (i % 26);
+       do_tx(domain, type, protocol);
+}
+
+static void usage(const char *filepath)
+{
+       error(1, 0, "Usage: %s [-f] [-n<N>] [-z0] [-s<payload size>] "
+                   "(-4|-6) [-t<time s>] -D<dst_ip> udp", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+       const int max_payload_len = sizeof(payload) -
+                                   sizeof(struct ipv6hdr) -
+                                   sizeof(struct tcphdr) -
+                                   40 /* max tcp options */;
+       struct sockaddr_in6 *addr6 = (void *) &cfg_dst_addr;
+       struct sockaddr_in *addr4 = (void *) &cfg_dst_addr;
+       char *daddr = NULL;
+       int c;
+
+       if (argc <= 1)
+               usage(argv[0]);
+       cfg_payload_len = max_payload_len;
+
+       while ((c = getopt(argc, argv, "46D:p:s:t:n:fc:m:")) != -1) {
+               switch (c) {
+               case '4':
+                       if (cfg_family != PF_UNSPEC)
+                               error(1, 0, "Pass one of -4 or -6");
+                       cfg_family = PF_INET;
+                       cfg_alen = sizeof(struct sockaddr_in);
+                       break;
+               case '6':
+                       if (cfg_family != PF_UNSPEC)
+                               error(1, 0, "Pass one of -4 or -6");
+                       cfg_family = PF_INET6;
+                       cfg_alen = sizeof(struct sockaddr_in6);
+                       break;
+               case 'D':
+                       daddr = optarg;
+                       break;
+               case 'p':
+                       cfg_port = strtoul(optarg, NULL, 0);
+                       break;
+               case 's':
+                       cfg_payload_len = strtoul(optarg, NULL, 0);
+                       break;
+               case 't':
+                       cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
+                       break;
+               case 'n':
+                       cfg_nr_reqs = strtoul(optarg, NULL, 0);
+                       break;
+               case 'f':
+                       cfg_flush = 1;
+                       break;
+               case 'c':
+                       cfg_cork = strtol(optarg, NULL, 0);
+                       break;
+               case 'm':
+                       cfg_mode = strtol(optarg, NULL, 0);
+                       break;
+               }
+       }
+
+       switch (cfg_family) {
+       case PF_INET:
+               memset(addr4, 0, sizeof(*addr4));
+               addr4->sin_family = AF_INET;
+               addr4->sin_port = htons(cfg_port);
+               if (daddr &&
+                   inet_pton(AF_INET, daddr, &(addr4->sin_addr)) != 1)
+                       error(1, 0, "ipv4 parse error: %s", daddr);
+               break;
+       case PF_INET6:
+               memset(addr6, 0, sizeof(*addr6));
+               addr6->sin6_family = AF_INET6;
+               addr6->sin6_port = htons(cfg_port);
+               if (daddr &&
+                   inet_pton(AF_INET6, daddr, &(addr6->sin6_addr)) != 1)
+                       error(1, 0, "ipv6 parse error: %s", daddr);
+               break;
+       default:
+               error(1, 0, "illegal domain");
+       }
+
+       if (cfg_payload_len > max_payload_len)
+               error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
+       if (cfg_mode == MODE_NONZC && cfg_flush)
+               error(1, 0, "-f: only zerocopy modes support notifications");
+       if (optind != argc - 1)
+               usage(argv[0]);
+}
+
+int main(int argc, char **argv)
+{
+       const char *cfg_test = argv[argc - 1];
+
+       parse_opts(argc, argv);
+
+       if (!strcmp(cfg_test, "tcp"))
+               do_test(cfg_family, SOCK_STREAM, 0);
+       else if (!strcmp(cfg_test, "udp"))
+               do_test(cfg_family, SOCK_DGRAM, 0);
+       else
+               error(1, 0, "unknown cfg_test %s", cfg_test);
+       return 0;
+}
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
new file mode 100755 (executable)
index 0000000..6a65e44
--- /dev/null
@@ -0,0 +1,131 @@
+#!/bin/bash
+#
+# Send data between two processes across namespaces
+# Run twice: once without and once with zerocopy
+
+set -e
+
+readonly DEV="veth0"
+readonly DEV_MTU=65535
+readonly BIN_TX="./io_uring_zerocopy_tx"
+readonly BIN_RX="./msg_zerocopy"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+readonly path_sysctl_mem="net.core.optmem_max"
+
+# No arguments: automated test
+if [[ "$#" -eq "0" ]]; then
+       IPs=( "4" "6" )
+       protocols=( "tcp" "udp" )
+
+       for IP in "${IPs[@]}"; do
+               for proto in "${protocols[@]}"; do
+                       for mode in $(seq 1 3); do
+                               $0 "$IP" "$proto" -m "$mode" -t 1 -n 32
+                               $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -f
+                               $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -c -f
+                       done
+               done
+       done
+
+       echo "OK. All tests passed"
+       exit 0
+fi
+
+# Argument parsing
+if [[ "$#" -lt "2" ]]; then
+       echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
+       exit 1
+fi
+
+readonly IP="$1"
+shift
+readonly TXMODE="$1"
+shift
+readonly EXTRA_ARGS="$@"
+
+# Argument parsing: configure addresses
+if [[ "${IP}" == "4" ]]; then
+       readonly SADDR="${SADDR4}"
+       readonly DADDR="${DADDR4}"
+elif [[ "${IP}" == "6" ]]; then
+       readonly SADDR="${SADDR6}"
+       readonly DADDR="${DADDR6}"
+else
+       echo "Invalid IP version ${IP}"
+       exit 1
+fi
+
+# Argument parsing: select receive mode
+#
+# This differs from send mode for
+# - packet:    use raw recv, because packet receives skb clones
+# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option
+case "${TXMODE}" in
+'packet' | 'packet_dgram' | 'raw_hdrincl')
+       RXMODE='raw'
+       ;;
+*)
+       RXMODE="${TXMODE}"
+       ;;
+esac
+
+# Start of state changes: install cleanup handler
+save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
+
+cleanup() {
+       ip netns del "${NS2}"
+       ip netns del "${NS1}"
+       sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
+}
+
+trap cleanup EXIT
+
+# Configure system settings
+sysctl -w -q "${path_sysctl_mem}=1000000"
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
+  peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add       fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add       fd::2/64 dev "${DEV}" nodad
+
+# Optionally disable sg or csum offload to test edge cases
+# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+
+do_test() {
+       local readonly ARGS="$1"
+
+       echo "ipv${IP} ${TXMODE} ${ARGS}"
+       ip netns exec "${NS2}" "${BIN_RX}" "-${IP}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" -r "${RXMODE}" &
+       sleep 0.2
+       ip netns exec "${NS1}" "${BIN_TX}" "-${IP}" -t 1 -D "${DADDR}" ${ARGS} "${TXMODE}"
+       wait
+}
+
+do_test "${EXTRA_ARGS}"
+echo ok