]>
Commit | Line | Data |
---|---|---|
449a30ed SY |
1 | /* |
2 | * Zebra Traffic Control (TC) interaction with the kernel using netlink. | |
3 | * | |
4 | * Copyright (C) 2022 Shichu Yang | |
5 | * | |
daa602b5 SY |
6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms of the GNU General Public License as published by the Free | |
8 | * Software Foundation; either version 2 of the License, or (at your option) | |
9 | * any later version. | |
449a30ed | 10 | * |
daa602b5 SY |
11 | * This program is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
14 | * more details. | |
449a30ed | 15 | * |
daa602b5 SY |
16 | * You should have received a copy of the GNU General Public License along |
17 | * with this program; see the file COPYING; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
449a30ed SY |
19 | */ |
20 | ||
21 | #include <zebra.h> | |
22 | ||
23 | #ifdef HAVE_NETLINK | |
24 | ||
c317d3f2 SY |
25 | #include <linux/pkt_cls.h> |
26 | #include <linux/pkt_sched.h> | |
83f496bd | 27 | #include <netinet/if_ether.h> |
449a30ed SY |
28 | #include <sys/socket.h> |
29 | ||
30 | #include "if.h" | |
31 | #include "prefix.h" | |
32 | #include "vrf.h" | |
33 | ||
449a30ed SY |
34 | #include "zebra/zserv.h" |
35 | #include "zebra/zebra_ns.h" | |
449a30ed SY |
36 | #include "zebra/rt.h" |
37 | #include "zebra/interface.h" | |
38 | #include "zebra/debug.h" | |
449a30ed SY |
39 | #include "zebra/kernel_netlink.h" |
40 | #include "zebra/tc_netlink.h" | |
41 | #include "zebra/zebra_errors.h" | |
42 | #include "zebra/zebra_dplane.h" | |
c317d3f2 | 43 | #include "zebra/zebra_tc.h" |
449a30ed SY |
44 | #include "zebra/zebra_trace.h" |
45 | ||
449a30ed SY |
46 | #define TC_FREQ_DEFAULT (100) |
47 | ||
c317d3f2 SY |
48 | /* some magic number */ |
49 | #define TC_QDISC_MAJOR_ZEBRA (0xbeef0000u) | |
449a30ed SY |
50 | #define TC_MINOR_NOCLASS (0xffffu) |
51 | ||
449a30ed SY |
52 | #define TIME_UNITS_PER_SEC (1000000) |
53 | #define xmittime(r, s) (TIME_UNITS_PER_SEC * ((double)(s) / (double)(r))) | |
54 | ||
55 | static uint32_t tc_get_freq(void) | |
56 | { | |
57 | int freq = 0; | |
58 | FILE *fp = fopen("/proc/net/psched", "r"); | |
59 | ||
60 | if (fp) { | |
61 | uint32_t nom, denom; | |
62 | ||
63 | if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2) { | |
64 | if (nom == 1000000) | |
65 | freq = denom; | |
66 | } | |
67 | fclose(fp); | |
68 | } | |
69 | ||
70 | return freq == 0 ? TC_FREQ_DEFAULT : freq; | |
71 | } | |
72 | ||
449a30ed SY |
73 | static void tc_calc_rate_table(struct tc_ratespec *ratespec, uint32_t *table, |
74 | uint32_t mtu) | |
75 | { | |
76 | if (mtu == 0) | |
77 | mtu = 2047; | |
78 | ||
79 | int cell_log = -1; | |
80 | ||
81 | if (cell_log < 0) { | |
82 | cell_log = 0; | |
83 | while ((mtu >> cell_log) > 255) | |
84 | cell_log++; | |
85 | } | |
86 | ||
87 | for (int i = 0; i < 256; i++) | |
88 | table[i] = xmittime(ratespec->rate, (i + 1) << cell_log); | |
89 | ||
90 | ratespec->cell_align = -1; | |
91 | ratespec->cell_log = cell_log; | |
92 | ratespec->linklayer = TC_LINKLAYER_ETHERNET; | |
93 | } | |
94 | ||
95 | static int tc_flower_get_inet_prefix(const struct prefix *prefix, | |
96 | struct inet_prefix *addr) | |
97 | { | |
98 | addr->family = prefix->family; | |
99 | ||
100 | if (addr->family == AF_INET) { | |
101 | addr->bytelen = 4; | |
102 | addr->bitlen = prefix->prefixlen; | |
103 | addr->flags = 0; | |
104 | addr->flags |= PREFIXLEN_SPECIFIED; | |
105 | addr->flags |= ADDRTYPE_INET; | |
106 | memcpy(addr->data, prefix->u.val32, sizeof(prefix->u.val32)); | |
107 | } else if (addr->family == AF_INET6) { | |
108 | addr->bytelen = 16; | |
109 | addr->bitlen = prefix->prefixlen; | |
110 | addr->flags = 0; | |
111 | addr->flags |= PREFIXLEN_SPECIFIED; | |
112 | addr->flags |= ADDRTYPE_INET; | |
113 | memcpy(addr->data, prefix->u.val, sizeof(prefix->u.val)); | |
114 | } else { | |
115 | return -1; | |
116 | } | |
117 | ||
118 | return 0; | |
119 | } | |
120 | ||
121 | static int tc_flower_get_inet_mask(const struct prefix *prefix, | |
122 | struct inet_prefix *addr) | |
123 | { | |
124 | addr->family = prefix->family; | |
125 | ||
126 | if (addr->family == AF_INET) { | |
127 | addr->bytelen = 4; | |
128 | addr->bitlen = prefix->prefixlen; | |
129 | addr->flags = 0; | |
130 | addr->flags |= PREFIXLEN_SPECIFIED; | |
131 | addr->flags |= ADDRTYPE_INET; | |
132 | } else if (addr->family == AF_INET6) { | |
133 | addr->bytelen = 16; | |
134 | addr->bitlen = prefix->prefixlen; | |
135 | addr->flags = 0; | |
136 | addr->flags |= PREFIXLEN_SPECIFIED; | |
137 | addr->flags |= ADDRTYPE_INET; | |
138 | } else { | |
139 | return -1; | |
140 | } | |
141 | ||
142 | memset(addr->data, 0xff, addr->bytelen); | |
143 | ||
144 | int rest = prefix->prefixlen; | |
145 | ||
146 | for (int i = 0; i < addr->bytelen / 4; i++) { | |
147 | if (!rest) { | |
148 | addr->data[i] = 0; | |
149 | } else if (rest / 32 >= 1) { | |
150 | rest -= 32; | |
151 | } else { | |
152 | addr->data[i] <<= 32 - rest; | |
153 | addr->data[i] = htonl(addr->data[i]); | |
154 | rest = 0; | |
155 | } | |
156 | } | |
157 | ||
158 | return 0; | |
159 | } | |
160 | ||
161 | /* | |
162 | * Traffic control queue discipline encoding (only "htb" supported) | |
163 | */ | |
164 | static ssize_t netlink_qdisc_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, | |
165 | void *data, size_t datalen) | |
166 | { | |
167 | struct nlsock *nl; | |
c317d3f2 | 168 | const char *kind_str = NULL; |
449a30ed SY |
169 | |
170 | struct rtattr *nest; | |
171 | ||
172 | struct { | |
173 | struct nlmsghdr n; | |
174 | struct tcmsg t; | |
175 | char buf[0]; | |
176 | } *req = (void *)data; | |
177 | ||
178 | if (datalen < sizeof(*req)) | |
179 | return 0; | |
180 | ||
181 | nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); | |
182 | ||
183 | memset(req, 0, sizeof(*req)); | |
184 | ||
185 | req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); | |
186 | req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; | |
187 | ||
188 | req->n.nlmsg_flags |= NLM_F_REPLACE; | |
189 | ||
190 | req->n.nlmsg_type = cmd; | |
191 | ||
192 | req->n.nlmsg_pid = nl->snl.nl_pid; | |
193 | ||
194 | req->t.tcm_family = AF_UNSPEC; | |
195 | req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); | |
c317d3f2 SY |
196 | req->t.tcm_info = 0; |
197 | req->t.tcm_handle = 0; | |
449a30ed SY |
198 | req->t.tcm_parent = TC_H_ROOT; |
199 | ||
c317d3f2 SY |
200 | if (cmd == RTM_NEWQDISC) { |
201 | req->t.tcm_handle = TC_H_MAKE(TC_QDISC_MAJOR_ZEBRA, 0); | |
202 | ||
203 | kind_str = dplane_ctx_tc_qdisc_get_kind_str(ctx); | |
449a30ed | 204 | |
c317d3f2 SY |
205 | nl_attr_put(&req->n, datalen, TCA_KIND, kind_str, |
206 | strlen(kind_str) + 1); | |
449a30ed | 207 | |
c317d3f2 SY |
208 | nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); |
209 | ||
210 | switch (dplane_ctx_tc_qdisc_get_kind(ctx)) { | |
211 | case TC_QDISC_HTB: { | |
212 | struct tc_htb_glob htb_glob = { | |
213 | .rate2quantum = 10, | |
214 | .version = 3, | |
215 | .defcls = TC_MINOR_NOCLASS}; | |
216 | nl_attr_put(&req->n, datalen, TCA_HTB_INIT, &htb_glob, | |
217 | sizeof(htb_glob)); | |
218 | break; | |
219 | } | |
220 | case TC_QDISC_NOQUEUE: | |
221 | break; | |
222 | default: | |
223 | break; | |
224 | /* not implemented */ | |
225 | } | |
226 | ||
227 | nl_attr_nest_end(&req->n, nest); | |
228 | } else { | |
229 | /* ifindex are enough for del/get qdisc */ | |
230 | } | |
449a30ed SY |
231 | |
232 | return NLMSG_ALIGN(req->n.nlmsg_len); | |
233 | } | |
234 | ||
235 | /* | |
236 | * Traffic control class encoding | |
237 | */ | |
238 | static ssize_t netlink_tclass_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, | |
239 | void *data, size_t datalen) | |
240 | { | |
c317d3f2 | 241 | enum dplane_op_e op = dplane_ctx_get_op(ctx); |
449a30ed | 242 | |
c317d3f2 SY |
243 | struct nlsock *nl; |
244 | const char *kind_str = NULL; | |
449a30ed SY |
245 | |
246 | struct rtattr *nest; | |
247 | ||
248 | struct { | |
249 | struct nlmsghdr n; | |
250 | struct tcmsg t; | |
251 | char buf[0]; | |
252 | } *req = (void *)data; | |
253 | ||
254 | if (datalen < sizeof(*req)) | |
255 | return 0; | |
256 | ||
257 | nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); | |
258 | ||
259 | memset(req, 0, sizeof(*req)); | |
260 | ||
261 | req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); | |
262 | req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; | |
263 | ||
c317d3f2 SY |
264 | if (op == DPLANE_OP_TC_CLASS_UPDATE) |
265 | req->n.nlmsg_flags |= NLM_F_REPLACE; | |
266 | ||
449a30ed SY |
267 | req->n.nlmsg_type = cmd; |
268 | ||
269 | req->n.nlmsg_pid = nl->snl.nl_pid; | |
270 | ||
271 | req->t.tcm_family = AF_UNSPEC; | |
272 | req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); | |
449a30ed | 273 | |
c317d3f2 SY |
274 | req->t.tcm_handle = TC_H_MAKE(TC_QDISC_MAJOR_ZEBRA, |
275 | dplane_ctx_tc_class_get_handle(ctx)); | |
276 | req->t.tcm_parent = TC_H_MAKE(TC_QDISC_MAJOR_ZEBRA, 0); | |
277 | req->t.tcm_info = 0; | |
278 | ||
279 | kind_str = dplane_ctx_tc_class_get_kind_str(ctx); | |
280 | ||
281 | if (op == DPLANE_OP_TC_CLASS_ADD || op == DPLANE_OP_TC_CLASS_UPDATE) { | |
282 | zlog_debug("netlink tclass encoder: op: %s kind: %s handle: %u", | |
283 | op == DPLANE_OP_TC_CLASS_UPDATE ? "update" : "add", | |
284 | kind_str, dplane_ctx_tc_class_get_handle(ctx)); | |
285 | ||
286 | nl_attr_put(&req->n, datalen, TCA_KIND, kind_str, | |
287 | strlen(kind_str) + 1); | |
288 | ||
289 | nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); | |
449a30ed | 290 | |
c317d3f2 SY |
291 | switch (dplane_ctx_tc_class_get_kind(ctx)) { |
292 | case TC_QDISC_HTB: { | |
293 | struct tc_htb_opt htb_opt = {}; | |
449a30ed | 294 | |
c317d3f2 SY |
295 | uint64_t rate = dplane_ctx_tc_class_get_rate(ctx), |
296 | ceil = dplane_ctx_tc_class_get_ceil(ctx); | |
449a30ed | 297 | |
c317d3f2 | 298 | uint64_t buffer, cbuffer; |
449a30ed | 299 | |
c317d3f2 SY |
300 | /* TODO: fetch mtu from interface */ |
301 | uint32_t mtu = 1500; | |
449a30ed | 302 | |
c317d3f2 SY |
303 | uint32_t rtab[256]; |
304 | uint32_t ctab[256]; | |
449a30ed | 305 | |
c317d3f2 | 306 | ceil = MAX(rate, ceil); |
449a30ed | 307 | |
c317d3f2 SY |
308 | htb_opt.rate.rate = (rate >> 32 != 0) ? ~0U : rate; |
309 | htb_opt.ceil.rate = (ceil >> 32 != 0) ? ~0U : ceil; | |
449a30ed | 310 | |
c317d3f2 SY |
311 | buffer = rate / tc_get_freq() + mtu; |
312 | cbuffer = ceil / tc_get_freq() + mtu; | |
313 | ||
314 | htb_opt.buffer = buffer; | |
315 | htb_opt.cbuffer = cbuffer; | |
316 | ||
317 | tc_calc_rate_table(&htb_opt.rate, rtab, mtu); | |
318 | tc_calc_rate_table(&htb_opt.ceil, ctab, mtu); | |
319 | ||
320 | htb_opt.ceil.mpu = htb_opt.rate.mpu = 0; | |
321 | htb_opt.ceil.overhead = htb_opt.rate.overhead = 0; | |
322 | ||
323 | if (rate >> 32 != 0) { | |
324 | nl_attr_put(&req->n, datalen, TCA_HTB_RATE64, | |
325 | &rate, sizeof(rate)); | |
326 | } | |
327 | ||
328 | if (ceil >> 32 != 0) { | |
329 | nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, | |
330 | &ceil, sizeof(ceil)); | |
331 | } | |
332 | ||
333 | nl_attr_put(&req->n, datalen, TCA_HTB_PARMS, &htb_opt, | |
334 | sizeof(htb_opt)); | |
335 | ||
336 | nl_attr_put(&req->n, datalen, TCA_HTB_RTAB, rtab, | |
337 | sizeof(rtab)); | |
338 | nl_attr_put(&req->n, datalen, TCA_HTB_CTAB, ctab, | |
339 | sizeof(ctab)); | |
340 | break; | |
341 | } | |
342 | default: | |
343 | break; | |
344 | } | |
345 | ||
346 | nl_attr_nest_end(&req->n, nest); | |
449a30ed SY |
347 | } |
348 | ||
c317d3f2 SY |
349 | return NLMSG_ALIGN(req->n.nlmsg_len); |
350 | } | |
351 | ||
352 | static int netlink_tfilter_flower_port_type(uint8_t ip_proto, bool src) | |
353 | { | |
354 | if (ip_proto == IPPROTO_TCP) | |
355 | return src ? TCA_FLOWER_KEY_TCP_SRC : TCA_FLOWER_KEY_TCP_DST; | |
356 | else if (ip_proto == IPPROTO_UDP) | |
357 | return src ? TCA_FLOWER_KEY_UDP_SRC : TCA_FLOWER_KEY_UDP_DST; | |
358 | else if (ip_proto == IPPROTO_SCTP) | |
359 | return src ? TCA_FLOWER_KEY_SCTP_SRC : TCA_FLOWER_KEY_SCTP_DST; | |
360 | else | |
361 | return -1; | |
362 | } | |
363 | ||
364 | static void netlink_tfilter_flower_put_options(struct nlmsghdr *n, | |
365 | size_t datalen, | |
366 | struct zebra_dplane_ctx *ctx) | |
367 | { | |
368 | struct inet_prefix addr; | |
369 | uint32_t flags = 0, classid; | |
370 | uint8_t protocol = htons(dplane_ctx_tc_filter_get_eth_proto(ctx)); | |
371 | uint32_t filter_bm = dplane_ctx_tc_filter_get_filter_bm(ctx); | |
372 | ||
373 | if (filter_bm & TC_FLOWER_SRC_IP) { | |
374 | const struct prefix *src_p = | |
375 | dplane_ctx_tc_filter_get_src_ip(ctx); | |
376 | ||
377 | if (tc_flower_get_inet_prefix(src_p, &addr) != 0) | |
378 | return; | |
379 | ||
380 | nl_attr_put(n, datalen, | |
381 | (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_SRC | |
382 | : TCA_FLOWER_KEY_IPV6_SRC, | |
383 | addr.data, addr.bytelen); | |
384 | ||
385 | if (tc_flower_get_inet_mask(src_p, &addr) != 0) | |
386 | return; | |
387 | ||
388 | nl_attr_put(n, datalen, | |
389 | (addr.family == AF_INET) | |
390 | ? TCA_FLOWER_KEY_IPV4_SRC_MASK | |
391 | : TCA_FLOWER_KEY_IPV6_SRC_MASK, | |
392 | addr.data, addr.bytelen); | |
449a30ed SY |
393 | } |
394 | ||
c317d3f2 SY |
395 | if (filter_bm & TC_FLOWER_DST_IP) { |
396 | const struct prefix *dst_p = | |
397 | dplane_ctx_tc_filter_get_dst_ip(ctx); | |
449a30ed | 398 | |
c317d3f2 SY |
399 | if (tc_flower_get_inet_prefix(dst_p, &addr) != 0) |
400 | return; | |
449a30ed | 401 | |
c317d3f2 SY |
402 | nl_attr_put(n, datalen, |
403 | (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_DST | |
404 | : TCA_FLOWER_KEY_IPV6_DST, | |
405 | addr.data, addr.bytelen); | |
406 | ||
407 | if (tc_flower_get_inet_mask(dst_p, &addr) != 0) | |
408 | return; | |
409 | ||
410 | nl_attr_put(n, datalen, | |
411 | (addr.family == AF_INET) | |
412 | ? TCA_FLOWER_KEY_IPV4_DST_MASK | |
413 | : TCA_FLOWER_KEY_IPV6_DST_MASK, | |
414 | addr.data, addr.bytelen); | |
415 | } | |
416 | ||
417 | if (filter_bm & TC_FLOWER_IP_PROTOCOL) { | |
418 | nl_attr_put8(n, datalen, TCA_FLOWER_KEY_IP_PROTO, | |
419 | dplane_ctx_tc_filter_get_ip_proto(ctx)); | |
420 | } | |
421 | ||
422 | if (filter_bm & TC_FLOWER_SRC_PORT) { | |
423 | uint16_t min, max; | |
424 | ||
425 | min = dplane_ctx_tc_filter_get_src_port_min(ctx); | |
426 | max = dplane_ctx_tc_filter_get_src_port_max(ctx); | |
427 | ||
428 | if (max > min) { | |
429 | nl_attr_put16(n, datalen, TCA_FLOWER_KEY_PORT_SRC_MIN, | |
430 | htons(min)); | |
431 | ||
432 | nl_attr_put16(n, datalen, TCA_FLOWER_KEY_PORT_SRC_MAX, | |
433 | htons(max)); | |
434 | } else { | |
435 | int type = netlink_tfilter_flower_port_type( | |
436 | dplane_ctx_tc_filter_get_ip_proto(ctx), true); | |
437 | ||
438 | if (type < 0) | |
439 | return; | |
440 | ||
441 | nl_attr_put16(n, datalen, type, htons(min)); | |
442 | } | |
443 | } | |
444 | ||
445 | if (filter_bm & TC_FLOWER_DST_PORT) { | |
446 | uint16_t min = dplane_ctx_tc_filter_get_dst_port_min(ctx), | |
447 | max = dplane_ctx_tc_filter_get_dst_port_max(ctx); | |
448 | ||
449 | if (max > min) { | |
450 | nl_attr_put16(n, datalen, TCA_FLOWER_KEY_PORT_DST_MIN, | |
451 | htons(min)); | |
452 | ||
453 | nl_attr_put16(n, datalen, TCA_FLOWER_KEY_PORT_DST_MAX, | |
454 | htons(max)); | |
455 | } else { | |
456 | int type = netlink_tfilter_flower_port_type( | |
457 | dplane_ctx_tc_filter_get_ip_proto(ctx), false); | |
458 | ||
459 | if (type < 0) | |
460 | return; | |
461 | ||
462 | nl_attr_put16(n, datalen, type, htons(min)); | |
463 | } | |
464 | } | |
465 | ||
466 | if (filter_bm & TC_FLOWER_DSFIELD) { | |
467 | nl_attr_put8(n, datalen, TCA_FLOWER_KEY_IP_TOS, | |
468 | dplane_ctx_tc_filter_get_dsfield(ctx)); | |
469 | nl_attr_put8(n, datalen, TCA_FLOWER_KEY_IP_TOS_MASK, | |
470 | dplane_ctx_tc_filter_get_dsfield_mask(ctx)); | |
471 | } | |
472 | ||
473 | classid = TC_H_MAKE(TC_QDISC_MAJOR_ZEBRA, | |
474 | dplane_ctx_tc_filter_get_classid(ctx)); | |
475 | nl_attr_put32(n, datalen, TCA_FLOWER_CLASSID, classid); | |
476 | ||
477 | nl_attr_put32(n, datalen, TCA_FLOWER_FLAGS, flags); | |
478 | ||
479 | nl_attr_put16(n, datalen, TCA_FLOWER_KEY_ETH_TYPE, protocol); | |
449a30ed SY |
480 | } |
481 | ||
482 | /* | |
c317d3f2 | 483 | * Traffic control filter encoding |
449a30ed SY |
484 | */ |
485 | static ssize_t netlink_tfilter_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, | |
486 | void *data, size_t datalen) | |
487 | { | |
c317d3f2 SY |
488 | enum dplane_op_e op = dplane_ctx_get_op(ctx); |
489 | ||
449a30ed | 490 | struct nlsock *nl; |
c317d3f2 | 491 | const char *kind_str = NULL; |
449a30ed | 492 | |
c317d3f2 | 493 | struct rtattr *nest; |
449a30ed SY |
494 | |
495 | uint16_t priority; | |
496 | uint16_t protocol; | |
449a30ed SY |
497 | |
498 | struct { | |
499 | struct nlmsghdr n; | |
500 | struct tcmsg t; | |
501 | char buf[0]; | |
502 | } *req = (void *)data; | |
503 | ||
504 | if (datalen < sizeof(*req)) | |
505 | return 0; | |
506 | ||
507 | nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); | |
508 | ||
509 | memset(req, 0, sizeof(*req)); | |
510 | ||
511 | req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); | |
512 | req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; | |
513 | ||
c317d3f2 SY |
514 | if (op == DPLANE_OP_TC_FILTER_UPDATE) |
515 | req->n.nlmsg_flags |= NLM_F_REPLACE; | |
449a30ed SY |
516 | |
517 | req->n.nlmsg_type = cmd; | |
518 | ||
519 | req->n.nlmsg_pid = nl->snl.nl_pid; | |
520 | ||
521 | req->t.tcm_family = AF_UNSPEC; | |
522 | req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); | |
523 | ||
c317d3f2 SY |
524 | priority = dplane_ctx_tc_filter_get_priority(ctx); |
525 | protocol = htons(dplane_ctx_tc_filter_get_eth_proto(ctx)); | |
526 | ||
527 | req->t.tcm_info = TC_H_MAKE(priority << 16, protocol); | |
528 | req->t.tcm_handle = dplane_ctx_tc_filter_get_handle(ctx); | |
529 | req->t.tcm_parent = TC_H_MAKE(TC_QDISC_MAJOR_ZEBRA, 0); | |
530 | ||
531 | kind_str = dplane_ctx_tc_filter_get_kind_str(ctx); | |
532 | ||
533 | if (op == DPLANE_OP_TC_FILTER_ADD || op == DPLANE_OP_TC_FILTER_UPDATE) { | |
534 | nl_attr_put(&req->n, datalen, TCA_KIND, kind_str, | |
535 | strlen(kind_str) + 1); | |
536 | ||
537 | zlog_debug( | |
538 | "netlink tfilter encoder: op: %s priority: %u protocol: %u kind: %s handle: %u filter_bm: %u ip_proto: %u", | |
539 | op == DPLANE_OP_TC_FILTER_UPDATE ? "update" : "add", | |
540 | priority, protocol, kind_str, | |
541 | dplane_ctx_tc_filter_get_handle(ctx), | |
542 | dplane_ctx_tc_filter_get_filter_bm(ctx), | |
543 | dplane_ctx_tc_filter_get_ip_proto(ctx)); | |
544 | ||
545 | nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); | |
546 | switch (dplane_ctx_tc_filter_get_kind(ctx)) { | |
547 | case TC_FILTER_FLOWER: { | |
548 | netlink_tfilter_flower_put_options(&req->n, datalen, | |
549 | ctx); | |
550 | break; | |
551 | } | |
552 | default: | |
553 | break; | |
554 | } | |
555 | nl_attr_nest_end(&req->n, nest); | |
556 | } | |
557 | ||
558 | return NLMSG_ALIGN(req->n.nlmsg_len); | |
559 | } | |
560 | ||
561 | static ssize_t netlink_newqdisc_msg_encoder(struct zebra_dplane_ctx *ctx, | |
562 | void *buf, size_t buflen) | |
563 | { | |
564 | return netlink_qdisc_msg_encode(RTM_NEWQDISC, ctx, buf, buflen); | |
565 | } | |
566 | ||
567 | static ssize_t netlink_delqdisc_msg_encoder(struct zebra_dplane_ctx *ctx, | |
568 | void *buf, size_t buflen) | |
569 | { | |
570 | return netlink_qdisc_msg_encode(RTM_DELQDISC, ctx, buf, buflen); | |
571 | } | |
572 | ||
573 | static ssize_t netlink_newtclass_msg_encoder(struct zebra_dplane_ctx *ctx, | |
574 | void *buf, size_t buflen) | |
575 | { | |
576 | return netlink_tclass_msg_encode(RTM_NEWTCLASS, ctx, buf, buflen); | |
577 | } | |
449a30ed | 578 | |
c317d3f2 SY |
579 | static ssize_t netlink_deltclass_msg_encoder(struct zebra_dplane_ctx *ctx, |
580 | void *buf, size_t buflen) | |
581 | { | |
582 | return netlink_tclass_msg_encode(RTM_DELTCLASS, ctx, buf, buflen); | |
583 | } | |
449a30ed | 584 | |
c317d3f2 SY |
585 | static ssize_t netlink_newtfilter_msg_encoder(struct zebra_dplane_ctx *ctx, |
586 | void *buf, size_t buflen) | |
587 | { | |
588 | return netlink_tfilter_msg_encode(RTM_NEWTFILTER, ctx, buf, buflen); | |
589 | } | |
449a30ed | 590 | |
c317d3f2 SY |
591 | static ssize_t netlink_deltfilter_msg_encoder(struct zebra_dplane_ctx *ctx, |
592 | void *buf, size_t buflen) | |
593 | { | |
594 | return netlink_tfilter_msg_encode(RTM_DELTFILTER, ctx, buf, buflen); | |
595 | } | |
449a30ed | 596 | |
c317d3f2 SY |
597 | enum netlink_msg_status |
598 | netlink_put_tc_qdisc_update_msg(struct nl_batch *bth, | |
599 | struct zebra_dplane_ctx *ctx) | |
600 | { | |
601 | enum dplane_op_e op; | |
602 | enum netlink_msg_status ret; | |
449a30ed | 603 | |
c317d3f2 | 604 | op = dplane_ctx_get_op(ctx); |
449a30ed | 605 | |
c317d3f2 SY |
606 | if (op == DPLANE_OP_TC_QDISC_INSTALL) { |
607 | ret = netlink_batch_add_msg( | |
608 | bth, ctx, netlink_newqdisc_msg_encoder, false); | |
609 | } else if (op == DPLANE_OP_TC_QDISC_UNINSTALL) { | |
610 | ret = netlink_batch_add_msg( | |
611 | bth, ctx, netlink_delqdisc_msg_encoder, false); | |
612 | } else { | |
613 | return FRR_NETLINK_ERROR; | |
614 | } | |
449a30ed | 615 | |
c317d3f2 SY |
616 | return ret; |
617 | } | |
449a30ed | 618 | |
c317d3f2 SY |
619 | enum netlink_msg_status |
620 | netlink_put_tc_class_update_msg(struct nl_batch *bth, | |
621 | struct zebra_dplane_ctx *ctx) | |
622 | { | |
623 | enum dplane_op_e op; | |
624 | enum netlink_msg_status ret; | |
449a30ed | 625 | |
c317d3f2 SY |
626 | op = dplane_ctx_get_op(ctx); |
627 | ||
628 | if (op == DPLANE_OP_TC_CLASS_ADD || op == DPLANE_OP_TC_CLASS_UPDATE) { | |
629 | ret = netlink_batch_add_msg( | |
630 | bth, ctx, netlink_newtclass_msg_encoder, false); | |
631 | } else if (op == DPLANE_OP_TC_CLASS_DELETE) { | |
632 | ret = netlink_batch_add_msg( | |
633 | bth, ctx, netlink_deltclass_msg_encoder, false); | |
634 | } else { | |
635 | return FRR_NETLINK_ERROR; | |
449a30ed SY |
636 | } |
637 | ||
c317d3f2 SY |
638 | return ret; |
639 | } | |
449a30ed | 640 | |
c317d3f2 SY |
641 | enum netlink_msg_status |
642 | netlink_put_tc_filter_update_msg(struct nl_batch *bth, | |
643 | struct zebra_dplane_ctx *ctx) | |
644 | { | |
645 | enum dplane_op_e op; | |
646 | enum netlink_msg_status ret; | |
647 | ||
648 | op = dplane_ctx_get_op(ctx); | |
649 | ||
650 | if (op == DPLANE_OP_TC_FILTER_ADD) { | |
651 | ret = netlink_batch_add_msg( | |
652 | bth, ctx, netlink_newtfilter_msg_encoder, false); | |
653 | } else if (op == DPLANE_OP_TC_FILTER_UPDATE) { | |
654 | /* | |
655 | * Replace will fail if either filter type or the number of | |
656 | * filter options is changed, so DEL then NEW | |
657 | * | |
658 | * TFILTER may have refs to TCLASS. | |
659 | */ | |
660 | ||
661 | (void)netlink_batch_add_msg( | |
662 | bth, ctx, netlink_deltfilter_msg_encoder, false); | |
663 | ret = netlink_batch_add_msg( | |
664 | bth, ctx, netlink_newtfilter_msg_encoder, false); | |
665 | } else if (op == DPLANE_OP_TC_FILTER_DELETE) { | |
666 | ret = netlink_batch_add_msg( | |
667 | bth, ctx, netlink_deltfilter_msg_encoder, false); | |
668 | } else { | |
669 | return FRR_NETLINK_ERROR; | |
670 | } | |
449a30ed | 671 | |
c317d3f2 SY |
672 | return ret; |
673 | } | |
449a30ed | 674 | |
c317d3f2 SY |
675 | /* |
676 | * Request filters from the kernel | |
677 | */ | |
678 | static int netlink_request_filters(struct zebra_ns *zns, int family, int type, | |
679 | ifindex_t ifindex) | |
680 | { | |
681 | struct { | |
682 | struct nlmsghdr n; | |
683 | struct tcmsg tc; | |
684 | } req; | |
449a30ed | 685 | |
c317d3f2 SY |
686 | memset(&req, 0, sizeof(req)); |
687 | req.n.nlmsg_type = type; | |
688 | req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; | |
689 | req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); | |
690 | req.tc.tcm_family = family; | |
691 | req.tc.tcm_ifindex = ifindex; | |
692 | ||
693 | return netlink_request(&zns->netlink_cmd, &req); | |
694 | } | |
695 | ||
696 | /* | |
697 | * Request queue discipline from the kernel | |
698 | */ | |
699 | static int netlink_request_qdiscs(struct zebra_ns *zns, int family, int type) | |
700 | { | |
701 | struct { | |
702 | struct nlmsghdr n; | |
703 | struct tcmsg tc; | |
704 | } req; | |
705 | ||
706 | memset(&req, 0, sizeof(req)); | |
707 | req.n.nlmsg_type = type; | |
708 | req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; | |
709 | req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); | |
710 | req.tc.tcm_family = family; | |
711 | ||
712 | return netlink_request(&zns->netlink_cmd, &req); | |
713 | } | |
714 | ||
715 | int netlink_qdisc_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) | |
716 | { | |
717 | struct tcmsg *tcm; | |
718 | struct zebra_tc_qdisc qdisc = {}; | |
719 | ||
720 | int len; | |
721 | struct rtattr *tb[TCA_MAX + 1]; | |
722 | ||
723 | frrtrace(3, frr_zebra, netlink_tc_qdisc_change, h, ns_id, startup); | |
724 | ||
725 | len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct tcmsg)); | |
726 | ||
727 | if (len < 0) { | |
728 | zlog_err( | |
729 | "%s: Message received from netlink is of a broken size %d %zu", | |
730 | __func__, h->nlmsg_len, | |
731 | (size_t)NLMSG_LENGTH(sizeof(struct tcmsg))); | |
732 | return -1; | |
733 | } | |
734 | ||
735 | tcm = NLMSG_DATA(h); | |
736 | netlink_parse_rtattr(tb, TCA_MAX, TCA_RTA(tcm), len); | |
737 | ||
738 | const char *kind_str = (const char *)RTA_DATA(tb[TCA_KIND]); | |
739 | ||
740 | enum tc_qdisc_kind kind = tc_qdisc_str2kind(kind_str); | |
741 | ||
742 | qdisc.qdisc.ifindex = tcm->tcm_ifindex; | |
743 | ||
744 | switch (kind) { | |
745 | case TC_QDISC_NOQUEUE: | |
746 | /* "noqueue" is the default qdisc */ | |
747 | break; | |
748 | default: | |
749 | break; | |
449a30ed SY |
750 | } |
751 | ||
c317d3f2 SY |
752 | if (tb[TCA_OPTIONS] != NULL) { |
753 | struct rtattr *options[TCA_HTB_MAX + 1]; | |
754 | ||
755 | netlink_parse_rtattr_nested(options, TCA_HTB_MAX, | |
756 | tb[TCA_OPTIONS]); | |
757 | ||
758 | /* TODO: more details */ | |
759 | /* struct tc_htb_glob *glob = RTA_DATA(options[TCA_HTB_INIT]); | |
760 | */ | |
449a30ed SY |
761 | } |
762 | ||
c317d3f2 SY |
763 | if (h->nlmsg_type == RTM_NEWQDISC) { |
764 | if (startup && | |
765 | TC_H_MAJ(tcm->tcm_handle) == TC_QDISC_MAJOR_ZEBRA) { | |
766 | enum zebra_dplane_result ret; | |
449a30ed | 767 | |
c317d3f2 | 768 | ret = dplane_tc_qdisc_uninstall(&qdisc); |
449a30ed | 769 | |
c317d3f2 SY |
770 | zlog_debug("%s: %s leftover qdisc: ifindex %d kind %s", |
771 | __func__, | |
772 | ((ret == ZEBRA_DPLANE_REQUEST_FAILURE) | |
773 | ? "Failed to remove" | |
774 | : "Removed"), | |
775 | qdisc.qdisc.ifindex, kind_str); | |
776 | } | |
777 | } | |
778 | ||
779 | return 0; | |
449a30ed SY |
780 | } |
781 | ||
c317d3f2 | 782 | int netlink_tclass_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) |
449a30ed | 783 | { |
c317d3f2 SY |
784 | struct tcmsg *tcm; |
785 | ||
786 | int len; | |
787 | struct rtattr *tb[TCA_MAX + 1]; | |
788 | ||
789 | frrtrace(3, frr_zebra, netlink_tc_class_change, h, ns_id, startup); | |
790 | ||
791 | len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct tcmsg)); | |
792 | ||
793 | if (len < 0) { | |
794 | zlog_err( | |
795 | "%s: Message received from netlink is of a broken size %d %zu", | |
796 | __func__, h->nlmsg_len, | |
797 | (size_t)NLMSG_LENGTH(sizeof(struct tcmsg))); | |
798 | return -1; | |
799 | } | |
800 | ||
801 | tcm = NLMSG_DATA(h); | |
802 | netlink_parse_rtattr(tb, TCA_MAX, TCA_RTA(tcm), len); | |
803 | ||
804 | ||
805 | if (tb[TCA_OPTIONS] != NULL) { | |
806 | struct rtattr *options[TCA_HTB_MAX + 1]; | |
807 | ||
808 | netlink_parse_rtattr_nested(options, TCA_HTB_MAX, | |
809 | tb[TCA_OPTIONS]); | |
810 | ||
811 | /* TODO: more details */ | |
812 | /* struct tc_htb_opt *opt = RTA_DATA(options[TCA_HTB_PARMS]); */ | |
813 | } | |
814 | ||
815 | return 0; | |
449a30ed SY |
816 | } |
817 | ||
c317d3f2 | 818 | int netlink_tfilter_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) |
449a30ed | 819 | { |
c317d3f2 SY |
820 | struct tcmsg *tcm; |
821 | ||
822 | int len; | |
823 | struct rtattr *tb[TCA_MAX + 1]; | |
824 | ||
825 | frrtrace(3, frr_zebra, netlink_tc_filter_change, h, ns_id, startup); | |
826 | ||
827 | len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct tcmsg)); | |
828 | ||
829 | if (len < 0) { | |
830 | zlog_err( | |
831 | "%s: Message received from netlink is of a broken size %d %zu", | |
832 | __func__, h->nlmsg_len, | |
833 | (size_t)NLMSG_LENGTH(sizeof(struct tcmsg))); | |
834 | return -1; | |
835 | } | |
836 | ||
837 | tcm = NLMSG_DATA(h); | |
838 | netlink_parse_rtattr(tb, TCA_MAX, TCA_RTA(tcm), len); | |
839 | ||
840 | return 0; | |
449a30ed SY |
841 | } |
842 | ||
c317d3f2 | 843 | int netlink_qdisc_read(struct zebra_ns *zns) |
449a30ed | 844 | { |
c317d3f2 SY |
845 | int ret; |
846 | struct zebra_dplane_info dp_info; | |
847 | ||
848 | zebra_dplane_info_from_zns(&dp_info, zns, true); | |
849 | ||
850 | ret = netlink_request_qdiscs(zns, AF_UNSPEC, RTM_GETQDISC); | |
851 | if (ret < 0) | |
852 | return ret; | |
853 | ||
854 | ret = netlink_parse_info(netlink_qdisc_change, &zns->netlink_cmd, | |
855 | &dp_info, 0, true); | |
856 | if (ret < 0) | |
857 | return ret; | |
858 | ||
859 | return 0; | |
449a30ed SY |
860 | } |
861 | ||
c317d3f2 | 862 | int netlink_tfilter_read_for_interface(struct zebra_ns *zns, ifindex_t ifindex) |
449a30ed | 863 | { |
c317d3f2 SY |
864 | int ret; |
865 | struct zebra_dplane_info dp_info; | |
866 | ||
867 | zebra_dplane_info_from_zns(&dp_info, zns, true); | |
449a30ed | 868 | |
c317d3f2 SY |
869 | ret = netlink_request_filters(zns, AF_UNSPEC, RTM_GETTFILTER, ifindex); |
870 | if (ret < 0) | |
871 | return ret; | |
872 | ||
873 | ret = netlink_parse_info(netlink_tfilter_change, &zns->netlink_cmd, | |
874 | &dp_info, 0, true); | |
875 | if (ret < 0) | |
876 | return ret; | |
877 | ||
878 | return 0; | |
449a30ed SY |
879 | } |
880 | ||
881 | #endif /* HAVE_NETLINK */ |