2 * Zebra Traffic Control (TC) interaction with the kernel using netlink.
4 * Copyright (C) 2022 Shichu Yang
6 * This file is part of FRR.
8 * FRR is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
13 * FRR is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FRR; see the file COPYING. If not, write to the Free
20 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
28 #include <linux/if_ether.h>
29 #include <sys/socket.h>
35 #include <linux/fib_rules.h>
36 #include <linux/pkt_cls.h>
37 #include <linux/pkt_sched.h>
38 #include "zebra/zserv.h"
39 #include "zebra/zebra_ns.h"
40 #include "zebra/zebra_vrf.h"
42 #include "zebra/interface.h"
43 #include "zebra/debug.h"
44 #include "zebra/rtadv.h"
45 #include "zebra/kernel_netlink.h"
46 #include "zebra/tc_netlink.h"
47 #include "zebra/zebra_errors.h"
48 #include "zebra/zebra_dplane.h"
49 #include "zebra/zebra_trace.h"
51 /* TODO: move these bitflags to zebra_tc.h */
52 #define TC_FILTER_SRC_IP (1 << 0)
53 #define TC_FILTER_DST_IP (1 << 1)
54 #define TC_FILTER_IP_PROTOCOL (1 << 9)
56 #define TC_FREQ_DEFAULT (100)
58 #define TC_MAJOR_BASE (0x1000u)
59 #define TC_MINOR_NOCLASS (0xffffu)
61 #define TC_FILTER_MASK (0x8000u)
63 #define TIME_UNITS_PER_SEC (1000000)
64 #define xmittime(r, s) (TIME_UNITS_PER_SEC * ((double)(s) / (double)(r)))
66 static uint32_t tc_get_freq(void)
69 FILE *fp
= fopen("/proc/net/psched", "r");
74 if (fscanf(fp
, "%*08x%*08x%08x%08x", &nom
, &denom
) == 2) {
81 return freq
== 0 ? TC_FREQ_DEFAULT
: freq
;
84 static inline uint32_t tc_make_handle(uint16_t major
, uint16_t minor
)
86 return (major
) << 16 | (minor
);
89 static inline uint32_t tc_get_handle(struct zebra_dplane_ctx
*ctx
,
92 uint16_t major
= TC_MAJOR_BASE
+ (uint16_t)dplane_ctx_get_ifindex(ctx
);
94 return tc_make_handle(major
, minor
);
97 static void tc_calc_rate_table(struct tc_ratespec
*ratespec
, uint32_t *table
,
107 while ((mtu
>> cell_log
) > 255)
111 for (int i
= 0; i
< 256; i
++)
112 table
[i
] = xmittime(ratespec
->rate
, (i
+ 1) << cell_log
);
114 ratespec
->cell_align
= -1;
115 ratespec
->cell_log
= cell_log
;
116 ratespec
->linklayer
= TC_LINKLAYER_ETHERNET
;
119 static int tc_flower_get_inet_prefix(const struct prefix
*prefix
,
120 struct inet_prefix
*addr
)
122 addr
->family
= prefix
->family
;
124 if (addr
->family
== AF_INET
) {
126 addr
->bitlen
= prefix
->prefixlen
;
128 addr
->flags
|= PREFIXLEN_SPECIFIED
;
129 addr
->flags
|= ADDRTYPE_INET
;
130 memcpy(addr
->data
, prefix
->u
.val32
, sizeof(prefix
->u
.val32
));
131 } else if (addr
->family
== AF_INET6
) {
133 addr
->bitlen
= prefix
->prefixlen
;
135 addr
->flags
|= PREFIXLEN_SPECIFIED
;
136 addr
->flags
|= ADDRTYPE_INET
;
137 memcpy(addr
->data
, prefix
->u
.val
, sizeof(prefix
->u
.val
));
145 static int tc_flower_get_inet_mask(const struct prefix
*prefix
,
146 struct inet_prefix
*addr
)
148 addr
->family
= prefix
->family
;
150 if (addr
->family
== AF_INET
) {
152 addr
->bitlen
= prefix
->prefixlen
;
154 addr
->flags
|= PREFIXLEN_SPECIFIED
;
155 addr
->flags
|= ADDRTYPE_INET
;
156 } else if (addr
->family
== AF_INET6
) {
158 addr
->bitlen
= prefix
->prefixlen
;
160 addr
->flags
|= PREFIXLEN_SPECIFIED
;
161 addr
->flags
|= ADDRTYPE_INET
;
166 memset(addr
->data
, 0xff, addr
->bytelen
);
168 int rest
= prefix
->prefixlen
;
170 for (int i
= 0; i
< addr
->bytelen
/ 4; i
++) {
173 } else if (rest
/ 32 >= 1) {
176 addr
->data
[i
] <<= 32 - rest
;
177 addr
->data
[i
] = htonl(addr
->data
[i
]);
186 * Traffic control queue discipline encoding (only "htb" supported)
188 static ssize_t
netlink_qdisc_msg_encode(int cmd
, struct zebra_dplane_ctx
*ctx
,
189 void *data
, size_t datalen
)
193 const char *kind
= "htb";
195 struct tc_htb_glob htb_glob
= {
196 .rate2quantum
= 10, .version
= 3, .defcls
= TC_MINOR_NOCLASS
};
204 } *req
= (void *)data
;
206 if (datalen
< sizeof(*req
))
209 nl
= kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx
));
211 memset(req
, 0, sizeof(*req
));
213 req
->n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct tcmsg
));
214 req
->n
.nlmsg_flags
= NLM_F_CREATE
| NLM_F_REQUEST
;
216 req
->n
.nlmsg_flags
|= NLM_F_REPLACE
;
218 req
->n
.nlmsg_type
= cmd
;
220 req
->n
.nlmsg_pid
= nl
->snl
.nl_pid
;
222 req
->t
.tcm_family
= AF_UNSPEC
;
223 req
->t
.tcm_ifindex
= dplane_ctx_get_ifindex(ctx
);
224 req
->t
.tcm_handle
= tc_get_handle(ctx
, 0);
225 req
->t
.tcm_parent
= TC_H_ROOT
;
227 nl_attr_put(&req
->n
, datalen
, TCA_KIND
, kind
, strlen(kind
) + 1);
229 nest
= nl_attr_nest(&req
->n
, datalen
, TCA_OPTIONS
);
231 nl_attr_put(&req
->n
, datalen
, TCA_HTB_INIT
, &htb_glob
,
233 nl_attr_nest_end(&req
->n
, nest
);
235 return NLMSG_ALIGN(req
->n
.nlmsg_len
);
239 * Traffic control class encoding
241 static ssize_t
netlink_tclass_msg_encode(int cmd
, struct zebra_dplane_ctx
*ctx
,
242 void *data
, size_t datalen
)
245 struct tc_htb_opt htb_opt
= {};
248 uint64_t buffer
, cbuffer
;
250 /* TODO: fetch mtu from interface */
262 } *req
= (void *)data
;
264 if (datalen
< sizeof(*req
))
267 nl
= kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx
));
269 memset(req
, 0, sizeof(*req
));
271 req
->n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct tcmsg
));
272 req
->n
.nlmsg_flags
= NLM_F_CREATE
| NLM_F_REQUEST
;
274 req
->n
.nlmsg_type
= cmd
;
276 req
->n
.nlmsg_pid
= nl
->snl
.nl_pid
;
278 req
->t
.tcm_family
= AF_UNSPEC
;
279 req
->t
.tcm_ifindex
= dplane_ctx_get_ifindex(ctx
);
280 req
->t
.tcm_handle
= tc_get_handle(ctx
, 1);
281 req
->t
.tcm_parent
= tc_get_handle(ctx
, 0);
283 rate
= dplane_ctx_tc_get_rate(ctx
);
284 ceil
= dplane_ctx_tc_get_ceil(ctx
);
286 ceil
= ceil
< rate
? rate
: ceil
;
288 htb_opt
.rate
.rate
= (rate
>> 32 != 0) ? ~0U : rate
;
289 htb_opt
.ceil
.rate
= (ceil
>> 32 != 0) ? ~0U : ceil
;
291 buffer
= rate
/ tc_get_freq(), cbuffer
= ceil
/ tc_get_freq();
293 htb_opt
.buffer
= buffer
;
294 htb_opt
.cbuffer
= cbuffer
;
296 tc_calc_rate_table(&htb_opt
.rate
, rtab
, mtu
);
297 tc_calc_rate_table(&htb_opt
.ceil
, ctab
, mtu
);
299 htb_opt
.ceil
.mpu
= htb_opt
.rate
.mpu
= 0;
300 htb_opt
.ceil
.overhead
= htb_opt
.rate
.overhead
= 0;
302 nest
= nl_attr_nest(&req
->n
, datalen
, TCA_OPTIONS
);
304 if (rate
>> 32 != 0) {
305 nl_attr_put(&req
->n
, datalen
, TCA_HTB_CEIL64
, &rate
,
309 if (ceil
>> 32 != 0) {
310 nl_attr_put(&req
->n
, datalen
, TCA_HTB_CEIL64
, &ceil
,
314 nl_attr_put(&req
->n
, datalen
, TCA_HTB_PARMS
, &htb_opt
, sizeof(htb_opt
));
316 nl_attr_put(&req
->n
, datalen
, TCA_HTB_RTAB
, rtab
, sizeof(rtab
));
317 nl_attr_put(&req
->n
, datalen
, TCA_HTB_CTAB
, ctab
, sizeof(ctab
));
318 nl_attr_nest_end(&req
->n
, nest
);
320 return NLMSG_ALIGN(req
->n
.nlmsg_len
);
324 * Traffic control filter encoding (only "flower" supported)
326 static ssize_t
netlink_tfilter_msg_encode(int cmd
, struct zebra_dplane_ctx
*ctx
,
327 void *data
, size_t datalen
)
332 const char *kind
= "flower";
340 struct inet_prefix addr
;
346 } *req
= (void *)data
;
348 if (datalen
< sizeof(*req
))
351 nl
= kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx
));
353 memset(req
, 0, sizeof(*req
));
355 req
->n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct tcmsg
));
356 req
->n
.nlmsg_flags
= NLM_F_CREATE
| NLM_F_REQUEST
;
358 req
->n
.nlmsg_flags
|= NLM_F_EXCL
;
360 req
->n
.nlmsg_type
= cmd
;
362 req
->n
.nlmsg_pid
= nl
->snl
.nl_pid
;
364 req
->t
.tcm_family
= AF_UNSPEC
;
365 req
->t
.tcm_ifindex
= dplane_ctx_get_ifindex(ctx
);
367 /* TODO: priority and layer-3 protocol support */
369 protocol
= htons(ETH_P_IP
);
370 classid
= tc_get_handle(ctx
, 1);
371 filter_bm
= dplane_ctx_tc_get_filter_bm(ctx
);
373 req
->t
.tcm_info
= tc_make_handle(priority
, protocol
);
375 req
->t
.tcm_handle
= 1;
376 req
->t
.tcm_parent
= tc_get_handle(ctx
, 0);
378 nl_attr_put(&req
->n
, datalen
, TCA_KIND
, kind
, strlen(kind
) + 1);
379 nest
= nl_attr_nest(&req
->n
, datalen
, TCA_OPTIONS
);
381 nl_attr_put(&req
->n
, datalen
, TCA_FLOWER_CLASSID
, &classid
,
384 if (filter_bm
& TC_FILTER_SRC_IP
) {
385 const struct prefix
*src_p
= dplane_ctx_tc_get_src_ip(ctx
);
387 if (tc_flower_get_inet_prefix(src_p
, &addr
) != 0)
390 nl_attr_put(&req
->n
, datalen
,
391 (addr
.family
== AF_INET
) ? TCA_FLOWER_KEY_IPV4_SRC
392 : TCA_FLOWER_KEY_IPV6_SRC
,
393 addr
.data
, addr
.bytelen
);
395 if (tc_flower_get_inet_mask(src_p
, &addr
) != 0)
398 nl_attr_put(&req
->n
, datalen
,
399 (addr
.family
== AF_INET
)
400 ? TCA_FLOWER_KEY_IPV4_SRC_MASK
401 : TCA_FLOWER_KEY_IPV6_SRC_MASK
,
402 addr
.data
, addr
.bytelen
);
405 if (filter_bm
& TC_FILTER_DST_IP
) {
406 const struct prefix
*dst_p
= dplane_ctx_tc_get_dst_ip(ctx
);
408 if (tc_flower_get_inet_prefix(dst_p
, &addr
) != 0)
411 nl_attr_put(&req
->n
, datalen
,
412 (addr
.family
== AF_INET
) ? TCA_FLOWER_KEY_IPV4_DST
413 : TCA_FLOWER_KEY_IPV6_DST
,
414 addr
.data
, addr
.bytelen
);
416 if (tc_flower_get_inet_mask(dst_p
, &addr
) != 0)
419 nl_attr_put(&req
->n
, datalen
,
420 (addr
.family
== AF_INET
)
421 ? TCA_FLOWER_KEY_IPV4_DST_MASK
422 : TCA_FLOWER_KEY_IPV6_DST_MASK
,
423 addr
.data
, addr
.bytelen
);
426 if (filter_bm
& TC_FILTER_IP_PROTOCOL
) {
427 nl_attr_put8(&req
->n
, datalen
, TCA_FLOWER_KEY_IP_PROTO
,
428 dplane_ctx_tc_get_ip_proto(ctx
));
431 nl_attr_put32(&req
->n
, datalen
, TCA_FLOWER_FLAGS
, flags
);
433 nl_attr_put16(&req
->n
, datalen
, TCA_FLOWER_KEY_ETH_TYPE
, protocol
);
434 nl_attr_nest_end(&req
->n
, nest
);
436 return NLMSG_ALIGN(req
->n
.nlmsg_len
);
439 static ssize_t
netlink_newqdisc_msg_encoder(struct zebra_dplane_ctx
*ctx
,
440 void *buf
, size_t buflen
)
442 return netlink_qdisc_msg_encode(RTM_NEWQDISC
, ctx
, buf
, buflen
);
445 static ssize_t
netlink_newtclass_msg_encoder(struct zebra_dplane_ctx
*ctx
,
446 void *buf
, size_t buflen
)
448 return netlink_tclass_msg_encode(RTM_NEWTCLASS
, ctx
, buf
, buflen
);
451 static ssize_t
netlink_newtfilter_msg_encoder(struct zebra_dplane_ctx
*ctx
,
452 void *buf
, size_t buflen
)
454 return netlink_tfilter_msg_encode(RTM_NEWTFILTER
, ctx
, buf
, buflen
);
457 enum netlink_msg_status
netlink_put_tc_update_msg(struct nl_batch
*bth
,
458 struct zebra_dplane_ctx
*ctx
)
460 /* TODO: error handling and other actions (delete, replace, ...) */
462 netlink_batch_add_msg(bth
, ctx
, netlink_newqdisc_msg_encoder
, false);
463 netlink_batch_add_msg(bth
, ctx
, netlink_newtclass_msg_encoder
, false);
464 return netlink_batch_add_msg(bth
, ctx
, netlink_newtfilter_msg_encoder
,
468 #endif /* HAVE_NETLINK */