]> git.proxmox.com Git - mirror_frr.git/blame - zebra/tc_netlink.c
zebra: update tc netlink / socket license header
[mirror_frr.git] / zebra / tc_netlink.c
CommitLineData
449a30ed
SY
1/*
2 * Zebra Traffic Control (TC) interaction with the kernel using netlink.
3 *
4 * Copyright (C) 2022 Shichu Yang
5 *
daa602b5
SY
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
449a30ed 10 *
daa602b5
SY
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
449a30ed 15 *
daa602b5
SY
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
449a30ed
SY
19 */
20
21#include <zebra.h>
22
23#ifdef HAVE_NETLINK
24
83f496bd 25#include <netinet/if_ether.h>
449a30ed
SY
26#include <sys/socket.h>
27
28#include "if.h"
29#include "prefix.h"
30#include "vrf.h"
31
32#include <linux/fib_rules.h>
33#include <linux/pkt_cls.h>
34#include <linux/pkt_sched.h>
35#include "zebra/zserv.h"
36#include "zebra/zebra_ns.h"
37#include "zebra/zebra_vrf.h"
38#include "zebra/rt.h"
39#include "zebra/interface.h"
40#include "zebra/debug.h"
41#include "zebra/rtadv.h"
42#include "zebra/kernel_netlink.h"
43#include "zebra/tc_netlink.h"
44#include "zebra/zebra_errors.h"
45#include "zebra/zebra_dplane.h"
46#include "zebra/zebra_trace.h"
47
48/* TODO: move these bitflags to zebra_tc.h */
49#define TC_FILTER_SRC_IP (1 << 0)
50#define TC_FILTER_DST_IP (1 << 1)
51#define TC_FILTER_IP_PROTOCOL (1 << 9)
52
53#define TC_FREQ_DEFAULT (100)
54
55#define TC_MAJOR_BASE (0x1000u)
56#define TC_MINOR_NOCLASS (0xffffu)
57
58#define TC_FILTER_MASK (0x8000u)
59
60#define TIME_UNITS_PER_SEC (1000000)
61#define xmittime(r, s) (TIME_UNITS_PER_SEC * ((double)(s) / (double)(r)))
62
63static uint32_t tc_get_freq(void)
64{
65 int freq = 0;
66 FILE *fp = fopen("/proc/net/psched", "r");
67
68 if (fp) {
69 uint32_t nom, denom;
70
71 if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2) {
72 if (nom == 1000000)
73 freq = denom;
74 }
75 fclose(fp);
76 }
77
78 return freq == 0 ? TC_FREQ_DEFAULT : freq;
79}
80
81static inline uint32_t tc_make_handle(uint16_t major, uint16_t minor)
82{
83 return (major) << 16 | (minor);
84}
85
86static inline uint32_t tc_get_handle(struct zebra_dplane_ctx *ctx,
87 uint16_t minor)
88{
89 uint16_t major = TC_MAJOR_BASE + (uint16_t)dplane_ctx_get_ifindex(ctx);
90
91 return tc_make_handle(major, minor);
92}
93
94static void tc_calc_rate_table(struct tc_ratespec *ratespec, uint32_t *table,
95 uint32_t mtu)
96{
97 if (mtu == 0)
98 mtu = 2047;
99
100 int cell_log = -1;
101
102 if (cell_log < 0) {
103 cell_log = 0;
104 while ((mtu >> cell_log) > 255)
105 cell_log++;
106 }
107
108 for (int i = 0; i < 256; i++)
109 table[i] = xmittime(ratespec->rate, (i + 1) << cell_log);
110
111 ratespec->cell_align = -1;
112 ratespec->cell_log = cell_log;
113 ratespec->linklayer = TC_LINKLAYER_ETHERNET;
114}
115
116static int tc_flower_get_inet_prefix(const struct prefix *prefix,
117 struct inet_prefix *addr)
118{
119 addr->family = prefix->family;
120
121 if (addr->family == AF_INET) {
122 addr->bytelen = 4;
123 addr->bitlen = prefix->prefixlen;
124 addr->flags = 0;
125 addr->flags |= PREFIXLEN_SPECIFIED;
126 addr->flags |= ADDRTYPE_INET;
127 memcpy(addr->data, prefix->u.val32, sizeof(prefix->u.val32));
128 } else if (addr->family == AF_INET6) {
129 addr->bytelen = 16;
130 addr->bitlen = prefix->prefixlen;
131 addr->flags = 0;
132 addr->flags |= PREFIXLEN_SPECIFIED;
133 addr->flags |= ADDRTYPE_INET;
134 memcpy(addr->data, prefix->u.val, sizeof(prefix->u.val));
135 } else {
136 return -1;
137 }
138
139 return 0;
140}
141
142static int tc_flower_get_inet_mask(const struct prefix *prefix,
143 struct inet_prefix *addr)
144{
145 addr->family = prefix->family;
146
147 if (addr->family == AF_INET) {
148 addr->bytelen = 4;
149 addr->bitlen = prefix->prefixlen;
150 addr->flags = 0;
151 addr->flags |= PREFIXLEN_SPECIFIED;
152 addr->flags |= ADDRTYPE_INET;
153 } else if (addr->family == AF_INET6) {
154 addr->bytelen = 16;
155 addr->bitlen = prefix->prefixlen;
156 addr->flags = 0;
157 addr->flags |= PREFIXLEN_SPECIFIED;
158 addr->flags |= ADDRTYPE_INET;
159 } else {
160 return -1;
161 }
162
163 memset(addr->data, 0xff, addr->bytelen);
164
165 int rest = prefix->prefixlen;
166
167 for (int i = 0; i < addr->bytelen / 4; i++) {
168 if (!rest) {
169 addr->data[i] = 0;
170 } else if (rest / 32 >= 1) {
171 rest -= 32;
172 } else {
173 addr->data[i] <<= 32 - rest;
174 addr->data[i] = htonl(addr->data[i]);
175 rest = 0;
176 }
177 }
178
179 return 0;
180}
181
182/*
183 * Traffic control queue discipline encoding (only "htb" supported)
184 */
185static ssize_t netlink_qdisc_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
186 void *data, size_t datalen)
187{
188 struct nlsock *nl;
189
190 const char *kind = "htb";
191
192 struct tc_htb_glob htb_glob = {
193 .rate2quantum = 10, .version = 3, .defcls = TC_MINOR_NOCLASS};
194
195 struct rtattr *nest;
196
197 struct {
198 struct nlmsghdr n;
199 struct tcmsg t;
200 char buf[0];
201 } *req = (void *)data;
202
203 if (datalen < sizeof(*req))
204 return 0;
205
206 nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
207
208 memset(req, 0, sizeof(*req));
209
210 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
211 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
212
213 req->n.nlmsg_flags |= NLM_F_REPLACE;
214
215 req->n.nlmsg_type = cmd;
216
217 req->n.nlmsg_pid = nl->snl.nl_pid;
218
219 req->t.tcm_family = AF_UNSPEC;
220 req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
221 req->t.tcm_handle = tc_get_handle(ctx, 0);
222 req->t.tcm_parent = TC_H_ROOT;
223
224 nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1);
225
226 nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
227
228 nl_attr_put(&req->n, datalen, TCA_HTB_INIT, &htb_glob,
229 sizeof(htb_glob));
230 nl_attr_nest_end(&req->n, nest);
231
232 return NLMSG_ALIGN(req->n.nlmsg_len);
233}
234
235/*
236 * Traffic control class encoding
237 */
238static ssize_t netlink_tclass_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
239 void *data, size_t datalen)
240{
241 struct nlsock *nl;
242 struct tc_htb_opt htb_opt = {};
243
244 uint64_t rate, ceil;
245 uint64_t buffer, cbuffer;
246
247 /* TODO: fetch mtu from interface */
248 uint32_t mtu = 0;
249
250 uint32_t rtab[256];
251 uint32_t ctab[256];
252
253 struct rtattr *nest;
254
255 struct {
256 struct nlmsghdr n;
257 struct tcmsg t;
258 char buf[0];
259 } *req = (void *)data;
260
261 if (datalen < sizeof(*req))
262 return 0;
263
264 nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
265
266 memset(req, 0, sizeof(*req));
267
268 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
269 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
270
271 req->n.nlmsg_type = cmd;
272
273 req->n.nlmsg_pid = nl->snl.nl_pid;
274
275 req->t.tcm_family = AF_UNSPEC;
276 req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
277 req->t.tcm_handle = tc_get_handle(ctx, 1);
278 req->t.tcm_parent = tc_get_handle(ctx, 0);
279
280 rate = dplane_ctx_tc_get_rate(ctx);
281 ceil = dplane_ctx_tc_get_ceil(ctx);
282
283 ceil = ceil < rate ? rate : ceil;
284
285 htb_opt.rate.rate = (rate >> 32 != 0) ? ~0U : rate;
286 htb_opt.ceil.rate = (ceil >> 32 != 0) ? ~0U : ceil;
287
288 buffer = rate / tc_get_freq(), cbuffer = ceil / tc_get_freq();
289
290 htb_opt.buffer = buffer;
291 htb_opt.cbuffer = cbuffer;
292
293 tc_calc_rate_table(&htb_opt.rate, rtab, mtu);
4c9b85ac 294 tc_calc_rate_table(&htb_opt.ceil, ctab, mtu);
449a30ed
SY
295
296 htb_opt.ceil.mpu = htb_opt.rate.mpu = 0;
297 htb_opt.ceil.overhead = htb_opt.rate.overhead = 0;
298
299 nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
300
301 if (rate >> 32 != 0) {
302 nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &rate,
303 sizeof(rate));
304 }
305
306 if (ceil >> 32 != 0) {
307 nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &ceil,
308 sizeof(ceil));
309 }
310
311 nl_attr_put(&req->n, datalen, TCA_HTB_PARMS, &htb_opt, sizeof(htb_opt));
312
313 nl_attr_put(&req->n, datalen, TCA_HTB_RTAB, rtab, sizeof(rtab));
314 nl_attr_put(&req->n, datalen, TCA_HTB_CTAB, ctab, sizeof(ctab));
315 nl_attr_nest_end(&req->n, nest);
316
317 return NLMSG_ALIGN(req->n.nlmsg_len);
318}
319
320/*
321 * Traffic control filter encoding (only "flower" supported)
322 */
323static ssize_t netlink_tfilter_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
324 void *data, size_t datalen)
325{
326 struct nlsock *nl;
327 struct rtattr *nest;
328
329 const char *kind = "flower";
330
331 uint16_t priority;
332 uint16_t protocol;
333 uint32_t classid;
334 uint32_t filter_bm;
335 uint32_t flags = 0;
336
337 struct inet_prefix addr;
338
339 struct {
340 struct nlmsghdr n;
341 struct tcmsg t;
342 char buf[0];
343 } *req = (void *)data;
344
345 if (datalen < sizeof(*req))
346 return 0;
347
348 nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
349
350 memset(req, 0, sizeof(*req));
351
352 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
353 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
354
355 req->n.nlmsg_flags |= NLM_F_EXCL;
356
357 req->n.nlmsg_type = cmd;
358
359 req->n.nlmsg_pid = nl->snl.nl_pid;
360
361 req->t.tcm_family = AF_UNSPEC;
362 req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
363
364 /* TODO: priority and layer-3 protocol support */
365 priority = 0;
366 protocol = htons(ETH_P_IP);
367 classid = tc_get_handle(ctx, 1);
368 filter_bm = dplane_ctx_tc_get_filter_bm(ctx);
369
370 req->t.tcm_info = tc_make_handle(priority, protocol);
371
372 req->t.tcm_handle = 1;
373 req->t.tcm_parent = tc_get_handle(ctx, 0);
374
375 nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1);
376 nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
377
378 nl_attr_put(&req->n, datalen, TCA_FLOWER_CLASSID, &classid,
379 sizeof(classid));
380
381 if (filter_bm & TC_FILTER_SRC_IP) {
382 const struct prefix *src_p = dplane_ctx_tc_get_src_ip(ctx);
383
384 if (tc_flower_get_inet_prefix(src_p, &addr) != 0)
385 return 0;
386
387 nl_attr_put(&req->n, datalen,
388 (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_SRC
389 : TCA_FLOWER_KEY_IPV6_SRC,
390 addr.data, addr.bytelen);
391
392 if (tc_flower_get_inet_mask(src_p, &addr) != 0)
393 return 0;
394
395 nl_attr_put(&req->n, datalen,
396 (addr.family == AF_INET)
397 ? TCA_FLOWER_KEY_IPV4_SRC_MASK
398 : TCA_FLOWER_KEY_IPV6_SRC_MASK,
399 addr.data, addr.bytelen);
400 }
401
402 if (filter_bm & TC_FILTER_DST_IP) {
403 const struct prefix *dst_p = dplane_ctx_tc_get_dst_ip(ctx);
404
405 if (tc_flower_get_inet_prefix(dst_p, &addr) != 0)
406 return 0;
407
408 nl_attr_put(&req->n, datalen,
409 (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_DST
410 : TCA_FLOWER_KEY_IPV6_DST,
411 addr.data, addr.bytelen);
412
413 if (tc_flower_get_inet_mask(dst_p, &addr) != 0)
414 return 0;
415
416 nl_attr_put(&req->n, datalen,
417 (addr.family == AF_INET)
418 ? TCA_FLOWER_KEY_IPV4_DST_MASK
419 : TCA_FLOWER_KEY_IPV6_DST_MASK,
420 addr.data, addr.bytelen);
421 }
422
423 if (filter_bm & TC_FILTER_IP_PROTOCOL) {
424 nl_attr_put8(&req->n, datalen, TCA_FLOWER_KEY_IP_PROTO,
425 dplane_ctx_tc_get_ip_proto(ctx));
426 }
427
428 nl_attr_put32(&req->n, datalen, TCA_FLOWER_FLAGS, flags);
429
430 nl_attr_put16(&req->n, datalen, TCA_FLOWER_KEY_ETH_TYPE, protocol);
431 nl_attr_nest_end(&req->n, nest);
432
433 return NLMSG_ALIGN(req->n.nlmsg_len);
434}
435
436static ssize_t netlink_newqdisc_msg_encoder(struct zebra_dplane_ctx *ctx,
437 void *buf, size_t buflen)
438{
439 return netlink_qdisc_msg_encode(RTM_NEWQDISC, ctx, buf, buflen);
440}
441
442static ssize_t netlink_newtclass_msg_encoder(struct zebra_dplane_ctx *ctx,
443 void *buf, size_t buflen)
444{
445 return netlink_tclass_msg_encode(RTM_NEWTCLASS, ctx, buf, buflen);
446}
447
448static ssize_t netlink_newtfilter_msg_encoder(struct zebra_dplane_ctx *ctx,
449 void *buf, size_t buflen)
450{
451 return netlink_tfilter_msg_encode(RTM_NEWTFILTER, ctx, buf, buflen);
452}
453
454enum netlink_msg_status netlink_put_tc_update_msg(struct nl_batch *bth,
455 struct zebra_dplane_ctx *ctx)
456{
457 /* TODO: error handling and other actions (delete, replace, ...) */
458
459 netlink_batch_add_msg(bth, ctx, netlink_newqdisc_msg_encoder, false);
460 netlink_batch_add_msg(bth, ctx, netlink_newtclass_msg_encoder, false);
461 return netlink_batch_add_msg(bth, ctx, netlink_newtfilter_msg_encoder,
462 false);
463}
464
465#endif /* HAVE_NETLINK */