]> git.proxmox.com Git - mirror_frr.git/blob - zebra/tc_netlink.c
Merge pull request #11824 from sigeryang/master
[mirror_frr.git] / zebra / tc_netlink.c
1 /*
2 * Zebra Traffic Control (TC) interaction with the kernel using netlink.
3 *
4 * Copyright (C) 2022 Shichu Yang
5 *
6 * This file is part of FRR.
7 *
8 * FRR is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
11 * later version.
12 *
13 * FRR is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FRR; see the file COPYING. If not, write to the Free
20 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 * 02111-1307, USA.
22 */
23
24 #include <zebra.h>
25
26 #ifdef HAVE_NETLINK
27
28 #include <linux/if_ether.h>
29 #include <sys/socket.h>
30
31 #include "if.h"
32 #include "prefix.h"
33 #include "vrf.h"
34
35 #include <linux/fib_rules.h>
36 #include <linux/pkt_cls.h>
37 #include <linux/pkt_sched.h>
38 #include "zebra/zserv.h"
39 #include "zebra/zebra_ns.h"
40 #include "zebra/zebra_vrf.h"
41 #include "zebra/rt.h"
42 #include "zebra/interface.h"
43 #include "zebra/debug.h"
44 #include "zebra/rtadv.h"
45 #include "zebra/kernel_netlink.h"
46 #include "zebra/tc_netlink.h"
47 #include "zebra/zebra_errors.h"
48 #include "zebra/zebra_dplane.h"
49 #include "zebra/zebra_trace.h"
50
51 /* TODO: move these bitflags to zebra_tc.h */
52 #define TC_FILTER_SRC_IP (1 << 0)
53 #define TC_FILTER_DST_IP (1 << 1)
54 #define TC_FILTER_IP_PROTOCOL (1 << 9)
55
56 #define TC_FREQ_DEFAULT (100)
57
58 #define TC_MAJOR_BASE (0x1000u)
59 #define TC_MINOR_NOCLASS (0xffffu)
60
61 #define TC_FILTER_MASK (0x8000u)
62
63 #define TIME_UNITS_PER_SEC (1000000)
64 #define xmittime(r, s) (TIME_UNITS_PER_SEC * ((double)(s) / (double)(r)))
65
66 static uint32_t tc_get_freq(void)
67 {
68 int freq = 0;
69 FILE *fp = fopen("/proc/net/psched", "r");
70
71 if (fp) {
72 uint32_t nom, denom;
73
74 if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2) {
75 if (nom == 1000000)
76 freq = denom;
77 }
78 fclose(fp);
79 }
80
81 return freq == 0 ? TC_FREQ_DEFAULT : freq;
82 }
83
84 static inline uint32_t tc_make_handle(uint16_t major, uint16_t minor)
85 {
86 return (major) << 16 | (minor);
87 }
88
89 static inline uint32_t tc_get_handle(struct zebra_dplane_ctx *ctx,
90 uint16_t minor)
91 {
92 uint16_t major = TC_MAJOR_BASE + (uint16_t)dplane_ctx_get_ifindex(ctx);
93
94 return tc_make_handle(major, minor);
95 }
96
97 static void tc_calc_rate_table(struct tc_ratespec *ratespec, uint32_t *table,
98 uint32_t mtu)
99 {
100 if (mtu == 0)
101 mtu = 2047;
102
103 int cell_log = -1;
104
105 if (cell_log < 0) {
106 cell_log = 0;
107 while ((mtu >> cell_log) > 255)
108 cell_log++;
109 }
110
111 for (int i = 0; i < 256; i++)
112 table[i] = xmittime(ratespec->rate, (i + 1) << cell_log);
113
114 ratespec->cell_align = -1;
115 ratespec->cell_log = cell_log;
116 ratespec->linklayer = TC_LINKLAYER_ETHERNET;
117 }
118
119 static int tc_flower_get_inet_prefix(const struct prefix *prefix,
120 struct inet_prefix *addr)
121 {
122 addr->family = prefix->family;
123
124 if (addr->family == AF_INET) {
125 addr->bytelen = 4;
126 addr->bitlen = prefix->prefixlen;
127 addr->flags = 0;
128 addr->flags |= PREFIXLEN_SPECIFIED;
129 addr->flags |= ADDRTYPE_INET;
130 memcpy(addr->data, prefix->u.val32, sizeof(prefix->u.val32));
131 } else if (addr->family == AF_INET6) {
132 addr->bytelen = 16;
133 addr->bitlen = prefix->prefixlen;
134 addr->flags = 0;
135 addr->flags |= PREFIXLEN_SPECIFIED;
136 addr->flags |= ADDRTYPE_INET;
137 memcpy(addr->data, prefix->u.val, sizeof(prefix->u.val));
138 } else {
139 return -1;
140 }
141
142 return 0;
143 }
144
145 static int tc_flower_get_inet_mask(const struct prefix *prefix,
146 struct inet_prefix *addr)
147 {
148 addr->family = prefix->family;
149
150 if (addr->family == AF_INET) {
151 addr->bytelen = 4;
152 addr->bitlen = prefix->prefixlen;
153 addr->flags = 0;
154 addr->flags |= PREFIXLEN_SPECIFIED;
155 addr->flags |= ADDRTYPE_INET;
156 } else if (addr->family == AF_INET6) {
157 addr->bytelen = 16;
158 addr->bitlen = prefix->prefixlen;
159 addr->flags = 0;
160 addr->flags |= PREFIXLEN_SPECIFIED;
161 addr->flags |= ADDRTYPE_INET;
162 } else {
163 return -1;
164 }
165
166 memset(addr->data, 0xff, addr->bytelen);
167
168 int rest = prefix->prefixlen;
169
170 for (int i = 0; i < addr->bytelen / 4; i++) {
171 if (!rest) {
172 addr->data[i] = 0;
173 } else if (rest / 32 >= 1) {
174 rest -= 32;
175 } else {
176 addr->data[i] <<= 32 - rest;
177 addr->data[i] = htonl(addr->data[i]);
178 rest = 0;
179 }
180 }
181
182 return 0;
183 }
184
185 /*
186 * Traffic control queue discipline encoding (only "htb" supported)
187 */
188 static ssize_t netlink_qdisc_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
189 void *data, size_t datalen)
190 {
191 struct nlsock *nl;
192
193 const char *kind = "htb";
194
195 struct tc_htb_glob htb_glob = {
196 .rate2quantum = 10, .version = 3, .defcls = TC_MINOR_NOCLASS};
197
198 struct rtattr *nest;
199
200 struct {
201 struct nlmsghdr n;
202 struct tcmsg t;
203 char buf[0];
204 } *req = (void *)data;
205
206 if (datalen < sizeof(*req))
207 return 0;
208
209 nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
210
211 memset(req, 0, sizeof(*req));
212
213 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
214 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
215
216 req->n.nlmsg_flags |= NLM_F_REPLACE;
217
218 req->n.nlmsg_type = cmd;
219
220 req->n.nlmsg_pid = nl->snl.nl_pid;
221
222 req->t.tcm_family = AF_UNSPEC;
223 req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
224 req->t.tcm_handle = tc_get_handle(ctx, 0);
225 req->t.tcm_parent = TC_H_ROOT;
226
227 nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1);
228
229 nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
230
231 nl_attr_put(&req->n, datalen, TCA_HTB_INIT, &htb_glob,
232 sizeof(htb_glob));
233 nl_attr_nest_end(&req->n, nest);
234
235 return NLMSG_ALIGN(req->n.nlmsg_len);
236 }
237
238 /*
239 * Traffic control class encoding
240 */
241 static ssize_t netlink_tclass_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
242 void *data, size_t datalen)
243 {
244 struct nlsock *nl;
245 struct tc_htb_opt htb_opt = {};
246
247 uint64_t rate, ceil;
248 uint64_t buffer, cbuffer;
249
250 /* TODO: fetch mtu from interface */
251 uint32_t mtu = 0;
252
253 uint32_t rtab[256];
254 uint32_t ctab[256];
255
256 struct rtattr *nest;
257
258 struct {
259 struct nlmsghdr n;
260 struct tcmsg t;
261 char buf[0];
262 } *req = (void *)data;
263
264 if (datalen < sizeof(*req))
265 return 0;
266
267 nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
268
269 memset(req, 0, sizeof(*req));
270
271 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
272 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
273
274 req->n.nlmsg_type = cmd;
275
276 req->n.nlmsg_pid = nl->snl.nl_pid;
277
278 req->t.tcm_family = AF_UNSPEC;
279 req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
280 req->t.tcm_handle = tc_get_handle(ctx, 1);
281 req->t.tcm_parent = tc_get_handle(ctx, 0);
282
283 rate = dplane_ctx_tc_get_rate(ctx);
284 ceil = dplane_ctx_tc_get_ceil(ctx);
285
286 ceil = ceil < rate ? rate : ceil;
287
288 htb_opt.rate.rate = (rate >> 32 != 0) ? ~0U : rate;
289 htb_opt.ceil.rate = (ceil >> 32 != 0) ? ~0U : ceil;
290
291 buffer = rate / tc_get_freq(), cbuffer = ceil / tc_get_freq();
292
293 htb_opt.buffer = buffer;
294 htb_opt.cbuffer = cbuffer;
295
296 tc_calc_rate_table(&htb_opt.rate, rtab, mtu);
297 tc_calc_rate_table(&htb_opt.ceil, ctab, mtu);
298
299 htb_opt.ceil.mpu = htb_opt.rate.mpu = 0;
300 htb_opt.ceil.overhead = htb_opt.rate.overhead = 0;
301
302 nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
303
304 if (rate >> 32 != 0) {
305 nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &rate,
306 sizeof(rate));
307 }
308
309 if (ceil >> 32 != 0) {
310 nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &ceil,
311 sizeof(ceil));
312 }
313
314 nl_attr_put(&req->n, datalen, TCA_HTB_PARMS, &htb_opt, sizeof(htb_opt));
315
316 nl_attr_put(&req->n, datalen, TCA_HTB_RTAB, rtab, sizeof(rtab));
317 nl_attr_put(&req->n, datalen, TCA_HTB_CTAB, ctab, sizeof(ctab));
318 nl_attr_nest_end(&req->n, nest);
319
320 return NLMSG_ALIGN(req->n.nlmsg_len);
321 }
322
323 /*
324 * Traffic control filter encoding (only "flower" supported)
325 */
326 static ssize_t netlink_tfilter_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
327 void *data, size_t datalen)
328 {
329 struct nlsock *nl;
330 struct rtattr *nest;
331
332 const char *kind = "flower";
333
334 uint16_t priority;
335 uint16_t protocol;
336 uint32_t classid;
337 uint32_t filter_bm;
338 uint32_t flags = 0;
339
340 struct inet_prefix addr;
341
342 struct {
343 struct nlmsghdr n;
344 struct tcmsg t;
345 char buf[0];
346 } *req = (void *)data;
347
348 if (datalen < sizeof(*req))
349 return 0;
350
351 nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
352
353 memset(req, 0, sizeof(*req));
354
355 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
356 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
357
358 req->n.nlmsg_flags |= NLM_F_EXCL;
359
360 req->n.nlmsg_type = cmd;
361
362 req->n.nlmsg_pid = nl->snl.nl_pid;
363
364 req->t.tcm_family = AF_UNSPEC;
365 req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
366
367 /* TODO: priority and layer-3 protocol support */
368 priority = 0;
369 protocol = htons(ETH_P_IP);
370 classid = tc_get_handle(ctx, 1);
371 filter_bm = dplane_ctx_tc_get_filter_bm(ctx);
372
373 req->t.tcm_info = tc_make_handle(priority, protocol);
374
375 req->t.tcm_handle = 1;
376 req->t.tcm_parent = tc_get_handle(ctx, 0);
377
378 nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1);
379 nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
380
381 nl_attr_put(&req->n, datalen, TCA_FLOWER_CLASSID, &classid,
382 sizeof(classid));
383
384 if (filter_bm & TC_FILTER_SRC_IP) {
385 const struct prefix *src_p = dplane_ctx_tc_get_src_ip(ctx);
386
387 if (tc_flower_get_inet_prefix(src_p, &addr) != 0)
388 return 0;
389
390 nl_attr_put(&req->n, datalen,
391 (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_SRC
392 : TCA_FLOWER_KEY_IPV6_SRC,
393 addr.data, addr.bytelen);
394
395 if (tc_flower_get_inet_mask(src_p, &addr) != 0)
396 return 0;
397
398 nl_attr_put(&req->n, datalen,
399 (addr.family == AF_INET)
400 ? TCA_FLOWER_KEY_IPV4_SRC_MASK
401 : TCA_FLOWER_KEY_IPV6_SRC_MASK,
402 addr.data, addr.bytelen);
403 }
404
405 if (filter_bm & TC_FILTER_DST_IP) {
406 const struct prefix *dst_p = dplane_ctx_tc_get_dst_ip(ctx);
407
408 if (tc_flower_get_inet_prefix(dst_p, &addr) != 0)
409 return 0;
410
411 nl_attr_put(&req->n, datalen,
412 (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_DST
413 : TCA_FLOWER_KEY_IPV6_DST,
414 addr.data, addr.bytelen);
415
416 if (tc_flower_get_inet_mask(dst_p, &addr) != 0)
417 return 0;
418
419 nl_attr_put(&req->n, datalen,
420 (addr.family == AF_INET)
421 ? TCA_FLOWER_KEY_IPV4_DST_MASK
422 : TCA_FLOWER_KEY_IPV6_DST_MASK,
423 addr.data, addr.bytelen);
424 }
425
426 if (filter_bm & TC_FILTER_IP_PROTOCOL) {
427 nl_attr_put8(&req->n, datalen, TCA_FLOWER_KEY_IP_PROTO,
428 dplane_ctx_tc_get_ip_proto(ctx));
429 }
430
431 nl_attr_put32(&req->n, datalen, TCA_FLOWER_FLAGS, flags);
432
433 nl_attr_put16(&req->n, datalen, TCA_FLOWER_KEY_ETH_TYPE, protocol);
434 nl_attr_nest_end(&req->n, nest);
435
436 return NLMSG_ALIGN(req->n.nlmsg_len);
437 }
438
439 static ssize_t netlink_newqdisc_msg_encoder(struct zebra_dplane_ctx *ctx,
440 void *buf, size_t buflen)
441 {
442 return netlink_qdisc_msg_encode(RTM_NEWQDISC, ctx, buf, buflen);
443 }
444
445 static ssize_t netlink_newtclass_msg_encoder(struct zebra_dplane_ctx *ctx,
446 void *buf, size_t buflen)
447 {
448 return netlink_tclass_msg_encode(RTM_NEWTCLASS, ctx, buf, buflen);
449 }
450
451 static ssize_t netlink_newtfilter_msg_encoder(struct zebra_dplane_ctx *ctx,
452 void *buf, size_t buflen)
453 {
454 return netlink_tfilter_msg_encode(RTM_NEWTFILTER, ctx, buf, buflen);
455 }
456
457 enum netlink_msg_status netlink_put_tc_update_msg(struct nl_batch *bth,
458 struct zebra_dplane_ctx *ctx)
459 {
460 /* TODO: error handling and other actions (delete, replace, ...) */
461
462 netlink_batch_add_msg(bth, ctx, netlink_newqdisc_msg_encoder, false);
463 netlink_batch_add_msg(bth, ctx, netlink_newtclass_msg_encoder, false);
464 return netlink_batch_add_msg(bth, ctx, netlink_newtfilter_msg_encoder,
465 false);
466 }
467
468 #endif /* HAVE_NETLINK */