--- /dev/null
+.TH TC 8 "December 2019" "iproute2" "Linux"
+.SH NAME
+ETS \- Enhanced Transmission Selection scheduler
+.SH SYNOPSIS
+.B tc qdisc ... ets [ bands
+number
+.B ] [ strict
+number
+.B ] [ quanta
+bytes bytes bytes...
+.B ] [ priomap
+band band band...
+.B ]
+
+.B tc class ... ets [ quantum
+bytes
+.B ]
+
+.SH DESCRIPTION
+
+The Enhanced Transmission Selection scheduler is a classful queuing
+discipline that merges functionality of PRIO and DRR qdiscs in one
+scheduler. ETS makes it easy to configure a set of strict and
+bandwidth-sharing bands to implement the transmission selection described
+in 802.1Qaz.
+
+On creation with 'tc qdisc add', a fixed number of bands is created. Each
+band is a class, although it is not possible to directly add and remove
+bands with 'tc class' commands. The number of bands to be created must
+instead be specified on the command line as the qdisc is added.
+
+The minor number of classid to use when referring to a band is the band
+number increased by one. Thus band 0 will have classid of major:1, band 1
+that of major:2, etc.
+
+ETS bands are of two types: some number may be in strict mode, the
+remaining ones are in bandwidth-sharing mode.
+
+.SH ALGORITHM
+When dequeuing, strict bands are tried first, if there are any. Band 0 is
+tried first. If it did not deliver a packet, band 1 is tried next, and so
+on until one of the bands delivers a packet, or the strict bands are
+exhausted.
+
+If no packet has been dequeued from any of the strict bands, if there are
+any bandwidth-sharing bands, the dequeuing proceeds according to the DRR
+algorithm. Each bandwidth-sharing band is assigned a deficit counter,
+initialized to quantum assigned by a
+.B quanta
+element. ETS maintains an (internal) ''active'' list of bandwidth-sharing
+bands whose qdiscs are non-empty. This list is used for dequeuing. A packet
+is dequeued from the band at the head of the list if the packet size is
+smaller or equal to the deficit counter. If the counter is too small, it is
+increased by
+.B quantum
+and the scheduler moves on to the next band in the active list.
+
+Only qdiscs that own their queue should be added below the
+bandwidth-sharing bands. Attaching to them non-work-conserving qdiscs like
+TBF does not make sense \-\- other qdiscs in the active list will be
+skipped until the dequeue operation succeeds. This limitation does not
+exist with the strict bands.
+
+.SH CLASSIFICATION
+The ETS qdisc allows three ways to decide which band to enqueue a packet
+to:
+
+- Packet priority can be directly set to a class handle, in which case that
+ is the queue where the packet will be put. For example, band number 2 of
+ a qdisc with handle of 11: will have classid 11:3. To mark a packet for
+ queuing to this band, the packet priority should be set to 0x110003.
+
+- A tc filter attached to the qdisc can put the packet to a band by using
+ the \fBflowid\fR keyword.
+
+- As a last resort, the ETS qdisc consults its priomap (see below), which
+ maps packets to bands based on packet priority.
+
+.SH PARAMETERS
+.TP
+strict
+The number of bands that should be created in strict mode. If not given,
+this value is 0.
+
+.TP
+quanta
+Each bandwidth-sharing band needs to know its quantum, which is the amount
+of bytes a band is allowed to dequeue before the scheduler moves to the
+next bandwidth-sharing band. The
+.B quanta
+argument lists quanta for the individual bandwidth-sharing bands.
+The minimum value of each quantum is 1. If
+.B quanta
+is not given, the default is no bandwidth-sharing bands, but note that when
+specifying a large number of
+.B bands,
+the extra ones are in bandwidth-sharing mode by default.
+
+.TP
+bands
+Number of bands given explicitly. This value has to be at least large
+enough to cover the strict bands specified through the
+.B strict
+keyword and bandwidth-sharing bands specified in
+.B quanta.
+If a larger value is given, any extra bands are in bandwidth-sharing mode,
+and their quanta are deduced from the interface MTU. If no value is given,
+as many bands are created as necessary to cover all bands implied by the
+.B strict
+and
+.B quanta
+keywords.
+
+.TP
+priomap
+The priomap maps the priority of a packet to a band. The argument is a list
+of numbers. The first number indicates which band the packets with priority
+0 should be put to, the second is for priority 1, and so on.
+
+There can be up to 16 numbers in the list. If there are fewer, the default
+band that traffic with one of the unmentioned priorities goes to is the
+last one.
+
+.SH EXAMPLE & USAGE
+
+.P
+Add a qdisc with 8 bandwidth-sharing bands, using the interface MTU as
+their quanta. Since all quanta are the same, this will lead to equal
+distribution of bandwidth between the bands, each will get about 12.5% of
+the link. The low 8 priorities go to individual bands in a reverse 1:1
+fashion (such that the highest priority goes to the first band).
+
+.P
+# tc qdisc add dev eth0 root handle 1: ets bands 8 priomap 7 6 5 4 3 2 1 0
+.br
+# tc qdisc show dev eth0
+.br
+qdisc ets 1: root refcnt 2 bands 8 quanta 1514 1514 1514 1514 1514 1514 1514 1514 priomap 7 6 5 4 3 2 1 0 7 7 7 7 7 7 7 7
+
+.P
+Tweak the first band of the above qdisc to give it a quantum of 2650, which
+will give it about 20% of the link (and about 11.5% to the remaining
+bands):
+
+.P
+# tc class change dev eth0 classid 1:1 ets quantum 2650
+.br
+# tc qdisc show dev eth0
+.br
+qdisc ets 1: root refcnt 2 bands 8 quanta 2650 1514 1514 1514 1514 1514 1514 1514 priomap 7 6 5 4 3 2 1 0 7 7 7 7 7 7 7 7
+
+.P
+Create a purely strict Qdisc with reverse 1:1 mapping between priorities
+and bands:
+
+.P
+# tc qdisc add dev eth0 root handle 1: ets strict 8 priomap 7 6 5 4 3 2 1 0
+.br
+# tc qdisc sh dev eth0
+.br
+qdisc ets 1: root refcnt 2 bands 8 strict 8 priomap 7 6 5 4 3 2 1 0 7 7 7 7 7 7 7 7
+
+.P
+Add a Qdisc with 6 bands, 3 strict and 3 ETS with 35%-30%-25% weights:
+.P
+# tc qdisc add dev eth0 root handle 1: ets strict 3 quanta 3500 3000 2500 priomap 0 1 1 1 2 3 4 5
+.br
+# tc qdisc sh dev eth0
+.br
+qdisc ets 1: root refcnt 2 bands 6 strict 3 quanta 3500 3000 2500 priomap 0 1 1 1 2 3 4 5 5 5 5 5 5 5 5 5
+
+.P
+Create a Qdisc such that traffic with priorities 2, 3 and 4 are strictly
+prioritized over other traffic, and the rest goes into bandwidth-sharing
+classes with equal weights:
+.P
+# tc qdisc add dev eth0 root handle 1: ets bands 8 strict 3 priomap 3 4 0 1 2 5 6 7
+.br
+# tc qdisc sh dev eth0
+.br
+qdisc ets 1: root refcnt 2 bands 8 strict 3 quanta 1514 1514 1514 1514 1514 priomap 3 4 0 1 2 5 6 7 7 7 7 7 7 7 7 7
+
+.SH SEE ALSO
+.BR tc (8),
+.BR tc-prio (8),
+.BR tc-drr (8)
+
+.SH AUTHOR
+Parts of both this manual page and the code itself are taken from PRIO and
+DRR qdiscs.
+.br
+ETS qdisc itself was written by Petr Machata.
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Enhanced Transmission Selection - 802.1Qaz-based Qdisc
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+ fprintf(stderr, "Usage: ... ets [bands NUMBER] [strict NUMBER] [quanta Q1 Q2...] [priomap P1 P2...]\n");
+}
+
+static void cexplain(void)
+{
+ fprintf(stderr, "Usage: ... ets [quantum Q1]\n");
+}
+
+static unsigned int parse_quantum(const char *arg)
+{
+ unsigned int quantum;
+
+ if (get_unsigned(&quantum, arg, 10)) {
+ fprintf(stderr, "Illegal \"quanta\" element\n");
+ return 0;
+ }
+ if (!quantum)
+ fprintf(stderr, "\"quanta\" must be > 0\n");
+ return quantum;
+}
+
+static int parse_nbands(const char *arg, __u8 *pnbands, const char *what)
+{
+ unsigned int tmp;
+
+ if (get_unsigned(&tmp, arg, 10)) {
+ fprintf(stderr, "Illegal \"%s\"\n", what);
+ return -1;
+ }
+ if (tmp > TCQ_ETS_MAX_BANDS) {
+ fprintf(stderr, "The number of \"%s\" must be <= %d\n",
+ what, TCQ_ETS_MAX_BANDS);
+ return -1;
+ }
+
+ *pnbands = tmp;
+ return 0;
+}
+
+static int ets_parse_opt(struct qdisc_util *qu, int argc, char **argv,
+ struct nlmsghdr *n, const char *dev)
+{
+ __u8 nbands = 0;
+ __u8 nstrict = 0;
+ bool quanta_mode = false;
+ unsigned int nquanta = 0;
+ __u32 quanta[TCQ_ETS_MAX_BANDS];
+ bool priomap_mode = false;
+ unsigned int nprio = 0;
+ __u8 priomap[TC_PRIO_MAX + 1];
+ unsigned int tmp;
+ struct rtattr *tail, *nest;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "bands") == 0) {
+ if (nbands) {
+ fprintf(stderr, "Duplicate \"bands\"\n");
+ return -1;
+ }
+ NEXT_ARG();
+ if (parse_nbands(*argv, &nbands, "bands"))
+ return -1;
+ priomap_mode = quanta_mode = false;
+ } else if (strcmp(*argv, "strict") == 0) {
+ if (nstrict) {
+ fprintf(stderr, "Duplicate \"strict\"\n");
+ return -1;
+ }
+ NEXT_ARG();
+ if (parse_nbands(*argv, &nstrict, "strict"))
+ return -1;
+ priomap_mode = quanta_mode = false;
+ } else if (strcmp(*argv, "quanta") == 0) {
+ if (nquanta) {
+ fprintf(stderr, "Duplicate \"quanta\"\n");
+ return -1;
+ }
+ NEXT_ARG();
+ priomap_mode = false;
+ quanta_mode = true;
+ goto parse_quantum;
+ } else if (strcmp(*argv, "priomap") == 0) {
+ if (nprio) {
+ fprintf(stderr, "Duplicate \"priomap\"\n");
+ return -1;
+ }
+ NEXT_ARG();
+ priomap_mode = true;
+ quanta_mode = false;
+ goto parse_priomap;
+ } else if (strcmp(*argv, "help") == 0) {
+ explain();
+ return -1;
+ } else if (quanta_mode) {
+ unsigned int quantum;
+
+parse_quantum:
+ quantum = parse_quantum(*argv);
+ if (!quantum)
+ return -1;
+ quanta[nquanta++] = quantum;
+ } else if (priomap_mode) {
+ unsigned int band;
+
+parse_priomap:
+ if (get_unsigned(&band, *argv, 10)) {
+ fprintf(stderr, "Illegal \"priomap\" element\n");
+ return -1;
+ }
+ if (nprio > TC_PRIO_MAX) {
+ fprintf(stderr, "\"priomap\" index cannot be higher than %u\n", TC_PRIO_MAX);
+ return -1;
+ }
+ priomap[nprio++] = band;
+ } else {
+ fprintf(stderr, "What is \"%s\"?\n", *argv);
+ explain();
+ return -1;
+ }
+ argc--; argv++;
+ }
+
+ if (!nbands)
+ nbands = nquanta + nstrict;
+ if (!nbands) {
+ fprintf(stderr, "One of \"bands\", \"quanta\" or \"strict\" needs to be specified\n");
+ explain();
+ return -1;
+ }
+ if (nbands < 1) {
+ fprintf(stderr, "The number of \"bands\" must be >= 1\n");
+ explain();
+ return -1;
+ }
+ if (nstrict + nquanta > nbands) {
+ fprintf(stderr, "Not enough total bands to cover all the strict bands and quanta\n");
+ explain();
+ return -1;
+ }
+ for (tmp = 0; tmp < nprio; tmp++) {
+ if (priomap[tmp] >= nbands) {
+ fprintf(stderr, "\"priomap\" element is out of bounds\n");
+ return -1;
+ }
+ }
+
+ tail = addattr_nest(n, 1024, TCA_OPTIONS | NLA_F_NESTED);
+ addattr_l(n, 1024, TCA_ETS_NBANDS, &nbands, sizeof(nbands));
+ if (nstrict)
+ addattr_l(n, 1024, TCA_ETS_NSTRICT, &nstrict, sizeof(nstrict));
+ if (nquanta) {
+ nest = addattr_nest(n, 1024, TCA_ETS_QUANTA | NLA_F_NESTED);
+ for (tmp = 0; tmp < nquanta; tmp++)
+ addattr_l(n, 1024, TCA_ETS_QUANTA_BAND,
+ &quanta[tmp], sizeof(quanta[0]));
+ addattr_nest_end(n, nest);
+ }
+ if (nprio) {
+ nest = addattr_nest(n, 1024, TCA_ETS_PRIOMAP | NLA_F_NESTED);
+ for (tmp = 0; tmp < nprio; tmp++)
+ addattr_l(n, 1024, TCA_ETS_PRIOMAP_BAND,
+ &priomap[tmp], sizeof(priomap[0]));
+ addattr_nest_end(n, nest);
+ }
+ addattr_nest_end(n, tail);
+
+ return 0;
+}
+
+static int ets_parse_copt(struct qdisc_util *qu, int argc, char **argv,
+ struct nlmsghdr *n, const char *dev)
+{
+ unsigned int quantum = 0;
+ struct rtattr *tail;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "quantum") == 0) {
+ if (quantum) {
+ fprintf(stderr, "Duplicate \"quantum\"\n");
+ return -1;
+ }
+ NEXT_ARG();
+ quantum = parse_quantum(*argv);
+ if (!quantum)
+ return -1;
+ } else if (strcmp(*argv, "help") == 0) {
+ cexplain();
+ return -1;
+ } else {
+ fprintf(stderr, "What is \"%s\"?\n", *argv);
+ cexplain();
+ return -1;
+ }
+ argc--; argv++;
+ }
+
+ tail = addattr_nest(n, 1024, TCA_OPTIONS | NLA_F_NESTED);
+ if (quantum)
+ addattr_l(n, 1024, TCA_ETS_QUANTA_BAND, &quantum,
+ sizeof(quantum));
+ addattr_nest_end(n, tail);
+
+ return 0;
+}
+
+static int ets_print_opt_quanta(struct rtattr *opt)
+{
+ int len = RTA_PAYLOAD(opt);
+ unsigned int offset;
+
+ open_json_array(PRINT_ANY, "quanta");
+ for (offset = 0; offset < len; ) {
+ struct rtattr *tb[TCA_ETS_MAX + 1] = {NULL};
+ struct rtattr *attr;
+ __u32 quantum;
+
+ attr = RTA_DATA(opt) + offset;
+ parse_rtattr(tb, TCA_ETS_MAX, attr, len - offset);
+ offset += RTA_LENGTH(RTA_PAYLOAD(attr));
+
+ if (!tb[TCA_ETS_QUANTA_BAND]) {
+ fprintf(stderr, "No ETS band quantum\n");
+ return -1;
+ }
+
+ quantum = rta_getattr_u32(tb[TCA_ETS_QUANTA_BAND]);
+ print_uint(PRINT_ANY, NULL, " %u", quantum);
+
+ }
+ close_json_array(PRINT_ANY, " ");
+
+ return 0;
+}
+
+static int ets_print_opt_priomap(struct rtattr *opt)
+{
+ int len = RTA_PAYLOAD(opt);
+ unsigned int offset;
+
+ open_json_array(PRINT_ANY, "priomap");
+ for (offset = 0; offset < len; ) {
+ struct rtattr *tb[TCA_ETS_MAX + 1] = {NULL};
+ struct rtattr *attr;
+ __u8 band;
+
+ attr = RTA_DATA(opt) + offset;
+ parse_rtattr(tb, TCA_ETS_MAX, attr, len - offset);
+ offset += RTA_LENGTH(RTA_PAYLOAD(attr)) + 3 /* padding */;
+
+ if (!tb[TCA_ETS_PRIOMAP_BAND]) {
+ fprintf(stderr, "No ETS priomap band\n");
+ return -1;
+ }
+
+ band = rta_getattr_u8(tb[TCA_ETS_PRIOMAP_BAND]);
+ print_uint(PRINT_ANY, NULL, " %u", band);
+
+ }
+ close_json_array(PRINT_ANY, " ");
+
+ return 0;
+}
+
+static int ets_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+ struct rtattr *tb[TCA_ETS_MAX + 1];
+ __u8 nbands;
+ __u8 nstrict;
+ int err;
+
+ if (opt == NULL)
+ return 0;
+
+ parse_rtattr_nested(tb, TCA_ETS_MAX, opt);
+
+ if (!tb[TCA_ETS_NBANDS] || !tb[TCA_ETS_PRIOMAP]) {
+ fprintf(stderr, "Incomplete ETS options\n");
+ return -1;
+ }
+
+ nbands = rta_getattr_u8(tb[TCA_ETS_NBANDS]);
+ print_uint(PRINT_ANY, "bands", "bands %u ", nbands);
+
+ if (tb[TCA_ETS_NSTRICT]) {
+ nstrict = rta_getattr_u8(tb[TCA_ETS_NSTRICT]);
+ print_uint(PRINT_ANY, "strict", "strict %u ", nstrict);
+ }
+
+ if (tb[TCA_ETS_QUANTA]) {
+ err = ets_print_opt_quanta(tb[TCA_ETS_QUANTA]);
+ if (err)
+ return err;
+ }
+
+ return ets_print_opt_priomap(tb[TCA_ETS_PRIOMAP]);
+}
+
+static int ets_print_copt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+ struct rtattr *tb[TCA_ETS_MAX + 1];
+ __u32 quantum;
+
+ if (opt == NULL)
+ return 0;
+
+ parse_rtattr_nested(tb, TCA_ETS_MAX, opt);
+
+ if (tb[TCA_ETS_QUANTA_BAND]) {
+ quantum = rta_getattr_u32(tb[TCA_ETS_QUANTA_BAND]);
+ print_uint(PRINT_ANY, "quantum", "quantum %u ", quantum);
+ }
+
+ return 0;
+}
+
+struct qdisc_util ets_qdisc_util = {
+ .id = "ets",
+ .parse_qopt = ets_parse_opt,
+ .parse_copt = ets_parse_copt,
+ .print_qopt = ets_print_opt,
+ .print_copt = ets_print_copt,
+};