]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/drivers/net/mlx5/mlx5_nl_flow.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / dpdk / drivers / net / mlx5 / mlx5_nl_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
4 */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
17 #include <stdalign.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/socket.h>
23
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
27 #include <rte_flow.h>
28
29 #include "mlx5.h"
30 #include "mlx5_autoconf.h"
31
32 #ifdef HAVE_TC_ACT_VLAN
33
34 #include <linux/tc_act/tc_vlan.h>
35
36 #else /* HAVE_TC_ACT_VLAN */
37
38 #define TCA_VLAN_ACT_POP 1
39 #define TCA_VLAN_ACT_PUSH 2
40 #define TCA_VLAN_ACT_MODIFY 3
41 #define TCA_VLAN_PARMS 2
42 #define TCA_VLAN_PUSH_VLAN_ID 3
43 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
44 #define TCA_VLAN_PAD 5
45 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
46
47 struct tc_vlan {
48 tc_gen;
49 int v_action;
50 };
51
52 #endif /* HAVE_TC_ACT_VLAN */
53
54 /* Normally found in linux/netlink.h. */
55 #ifndef NETLINK_CAP_ACK
56 #define NETLINK_CAP_ACK 10
57 #endif
58
59 /* Normally found in linux/pkt_sched.h. */
60 #ifndef TC_H_MIN_INGRESS
61 #define TC_H_MIN_INGRESS 0xfff2u
62 #endif
63
64 /* Normally found in linux/pkt_cls.h. */
65 #ifndef TCA_CLS_FLAGS_SKIP_SW
66 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
67 #endif
68 #ifndef HAVE_TCA_FLOWER_ACT
69 #define TCA_FLOWER_ACT 3
70 #endif
71 #ifndef HAVE_TCA_FLOWER_FLAGS
72 #define TCA_FLOWER_FLAGS 22
73 #endif
74 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
75 #define TCA_FLOWER_KEY_ETH_TYPE 8
76 #endif
77 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
78 #define TCA_FLOWER_KEY_ETH_DST 4
79 #endif
80 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
81 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
82 #endif
83 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
84 #define TCA_FLOWER_KEY_ETH_SRC 6
85 #endif
86 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
87 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
88 #endif
89 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
90 #define TCA_FLOWER_KEY_IP_PROTO 9
91 #endif
92 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
93 #define TCA_FLOWER_KEY_IPV4_SRC 10
94 #endif
95 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
96 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
97 #endif
98 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
99 #define TCA_FLOWER_KEY_IPV4_DST 12
100 #endif
101 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
102 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
103 #endif
104 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
105 #define TCA_FLOWER_KEY_IPV6_SRC 14
106 #endif
107 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
108 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
109 #endif
110 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
111 #define TCA_FLOWER_KEY_IPV6_DST 16
112 #endif
113 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
114 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
115 #endif
116 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
117 #define TCA_FLOWER_KEY_TCP_SRC 18
118 #endif
119 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
120 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
121 #endif
122 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
123 #define TCA_FLOWER_KEY_TCP_DST 19
124 #endif
125 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
126 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
127 #endif
128 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
129 #define TCA_FLOWER_KEY_UDP_SRC 20
130 #endif
131 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
132 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
133 #endif
134 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
135 #define TCA_FLOWER_KEY_UDP_DST 21
136 #endif
137 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
138 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
139 #endif
140 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
141 #define TCA_FLOWER_KEY_VLAN_ID 23
142 #endif
143 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
144 #define TCA_FLOWER_KEY_VLAN_PRIO 24
145 #endif
146 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
147 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
148 #endif
149
150 /** Parser state definitions for mlx5_nl_flow_trans[]. */
151 enum mlx5_nl_flow_trans {
152 INVALID,
153 BACK,
154 ATTR,
155 PATTERN,
156 ITEM_VOID,
157 ITEM_PORT_ID,
158 ITEM_ETH,
159 ITEM_VLAN,
160 ITEM_IPV4,
161 ITEM_IPV6,
162 ITEM_TCP,
163 ITEM_UDP,
164 ACTIONS,
165 ACTION_VOID,
166 ACTION_PORT_ID,
167 ACTION_DROP,
168 ACTION_OF_POP_VLAN,
169 ACTION_OF_PUSH_VLAN,
170 ACTION_OF_SET_VLAN_VID,
171 ACTION_OF_SET_VLAN_PCP,
172 END,
173 };
174
175 #define TRANS(...) (const enum mlx5_nl_flow_trans []){ __VA_ARGS__, INVALID, }
176
177 #define PATTERN_COMMON \
178 ITEM_VOID, ITEM_PORT_ID, ACTIONS
179 #define ACTIONS_COMMON \
180 ACTION_VOID, ACTION_OF_POP_VLAN, ACTION_OF_PUSH_VLAN, \
181 ACTION_OF_SET_VLAN_VID, ACTION_OF_SET_VLAN_PCP
182 #define ACTIONS_FATE \
183 ACTION_PORT_ID, ACTION_DROP
184
185 /** Parser state transitions used by mlx5_nl_flow_transpose(). */
186 static const enum mlx5_nl_flow_trans *const mlx5_nl_flow_trans[] = {
187 [INVALID] = NULL,
188 [BACK] = NULL,
189 [ATTR] = TRANS(PATTERN),
190 [PATTERN] = TRANS(ITEM_ETH, PATTERN_COMMON),
191 [ITEM_VOID] = TRANS(BACK),
192 [ITEM_PORT_ID] = TRANS(BACK),
193 [ITEM_ETH] = TRANS(ITEM_IPV4, ITEM_IPV6, ITEM_VLAN, PATTERN_COMMON),
194 [ITEM_VLAN] = TRANS(ITEM_IPV4, ITEM_IPV6, PATTERN_COMMON),
195 [ITEM_IPV4] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
196 [ITEM_IPV6] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
197 [ITEM_TCP] = TRANS(PATTERN_COMMON),
198 [ITEM_UDP] = TRANS(PATTERN_COMMON),
199 [ACTIONS] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
200 [ACTION_VOID] = TRANS(BACK),
201 [ACTION_PORT_ID] = TRANS(ACTION_VOID, END),
202 [ACTION_DROP] = TRANS(ACTION_VOID, END),
203 [ACTION_OF_POP_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
204 [ACTION_OF_PUSH_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
205 [ACTION_OF_SET_VLAN_VID] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
206 [ACTION_OF_SET_VLAN_PCP] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
207 [END] = NULL,
208 };
209
210 /** Empty masks for known item types. */
211 static const union {
212 struct rte_flow_item_port_id port_id;
213 struct rte_flow_item_eth eth;
214 struct rte_flow_item_vlan vlan;
215 struct rte_flow_item_ipv4 ipv4;
216 struct rte_flow_item_ipv6 ipv6;
217 struct rte_flow_item_tcp tcp;
218 struct rte_flow_item_udp udp;
219 } mlx5_nl_flow_mask_empty;
220
221 /** Supported masks for known item types. */
222 static const struct {
223 struct rte_flow_item_port_id port_id;
224 struct rte_flow_item_eth eth;
225 struct rte_flow_item_vlan vlan;
226 struct rte_flow_item_ipv4 ipv4;
227 struct rte_flow_item_ipv6 ipv6;
228 struct rte_flow_item_tcp tcp;
229 struct rte_flow_item_udp udp;
230 } mlx5_nl_flow_mask_supported = {
231 .port_id = {
232 .id = 0xffffffff,
233 },
234 .eth = {
235 .type = RTE_BE16(0xffff),
236 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
237 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
238 },
239 .vlan = {
240 /* PCP and VID only, no DEI. */
241 .tci = RTE_BE16(0xefff),
242 .inner_type = RTE_BE16(0xffff),
243 },
244 .ipv4.hdr = {
245 .next_proto_id = 0xff,
246 .src_addr = RTE_BE32(0xffffffff),
247 .dst_addr = RTE_BE32(0xffffffff),
248 },
249 .ipv6.hdr = {
250 .proto = 0xff,
251 .src_addr =
252 "\xff\xff\xff\xff\xff\xff\xff\xff"
253 "\xff\xff\xff\xff\xff\xff\xff\xff",
254 .dst_addr =
255 "\xff\xff\xff\xff\xff\xff\xff\xff"
256 "\xff\xff\xff\xff\xff\xff\xff\xff",
257 },
258 .tcp.hdr = {
259 .src_port = RTE_BE16(0xffff),
260 .dst_port = RTE_BE16(0xffff),
261 },
262 .udp.hdr = {
263 .src_port = RTE_BE16(0xffff),
264 .dst_port = RTE_BE16(0xffff),
265 },
266 };
267
268 /**
269 * Retrieve mask for pattern item.
270 *
271 * This function does basic sanity checks on a pattern item in order to
272 * return the most appropriate mask for it.
273 *
274 * @param[in] item
275 * Item specification.
276 * @param[in] mask_default
277 * Default mask for pattern item as specified by the flow API.
278 * @param[in] mask_supported
279 * Mask fields supported by the implementation.
280 * @param[in] mask_empty
281 * Empty mask to return when there is no specification.
282 * @param[out] error
283 * Perform verbose error reporting if not NULL.
284 *
285 * @return
286 * Either @p item->mask or one of the mask parameters on success, NULL
287 * otherwise and rte_errno is set.
288 */
289 static const void *
290 mlx5_nl_flow_item_mask(const struct rte_flow_item *item,
291 const void *mask_default,
292 const void *mask_supported,
293 const void *mask_empty,
294 size_t mask_size,
295 struct rte_flow_error *error)
296 {
297 const uint8_t *mask;
298 size_t i;
299
300 /* item->last and item->mask cannot exist without item->spec. */
301 if (!item->spec && (item->mask || item->last)) {
302 rte_flow_error_set
303 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
304 "\"mask\" or \"last\" field provided without a"
305 " corresponding \"spec\"");
306 return NULL;
307 }
308 /* No spec, no mask, no problem. */
309 if (!item->spec)
310 return mask_empty;
311 mask = item->mask ? item->mask : mask_default;
312 assert(mask);
313 /*
314 * Single-pass check to make sure that:
315 * - Mask is supported, no bits are set outside mask_supported.
316 * - Both item->spec and item->last are included in mask.
317 */
318 for (i = 0; i != mask_size; ++i) {
319 if (!mask[i])
320 continue;
321 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
322 ((const uint8_t *)mask_supported)[i]) {
323 rte_flow_error_set
324 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
325 mask, "unsupported field found in \"mask\"");
326 return NULL;
327 }
328 if (item->last &&
329 (((const uint8_t *)item->spec)[i] & mask[i]) !=
330 (((const uint8_t *)item->last)[i] & mask[i])) {
331 rte_flow_error_set
332 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_LAST,
333 item->last,
334 "range between \"spec\" and \"last\" not"
335 " comprised in \"mask\"");
336 return NULL;
337 }
338 }
339 return mask;
340 }
341
342 /**
343 * Transpose flow rule description to rtnetlink message.
344 *
345 * This function transposes a flow rule description to a traffic control
346 * (TC) filter creation message ready to be sent over Netlink.
347 *
348 * Target interface is specified as the first entry of the @p ptoi table.
349 * Subsequent entries enable this function to resolve other DPDK port IDs
350 * found in the flow rule.
351 *
352 * @param[out] buf
353 * Output message buffer. May be NULL when @p size is 0.
354 * @param size
355 * Size of @p buf. Message may be truncated if not large enough.
356 * @param[in] ptoi
357 * DPDK port ID to network interface index translation table. This table
358 * is terminated by an entry with a zero ifindex value.
359 * @param[in] attr
360 * Flow rule attributes.
361 * @param[in] pattern
362 * Pattern specification.
363 * @param[in] actions
364 * Associated actions.
365 * @param[out] error
366 * Perform verbose error reporting if not NULL.
367 *
368 * @return
369 * A positive value representing the exact size of the message in bytes
370 * regardless of the @p size parameter on success, a negative errno value
371 * otherwise and rte_errno is set.
372 */
373 int
374 mlx5_nl_flow_transpose(void *buf,
375 size_t size,
376 const struct mlx5_nl_flow_ptoi *ptoi,
377 const struct rte_flow_attr *attr,
378 const struct rte_flow_item *pattern,
379 const struct rte_flow_action *actions,
380 struct rte_flow_error *error)
381 {
382 alignas(struct nlmsghdr)
383 uint8_t buf_tmp[mnl_nlmsg_size(sizeof(struct tcmsg) + 1024)];
384 const struct rte_flow_item *item;
385 const struct rte_flow_action *action;
386 unsigned int n;
387 uint32_t act_index_cur;
388 bool in_port_id_set;
389 bool eth_type_set;
390 bool vlan_present;
391 bool vlan_eth_type_set;
392 bool ip_proto_set;
393 struct nlattr *na_flower;
394 struct nlattr *na_flower_act;
395 struct nlattr *na_vlan_id;
396 struct nlattr *na_vlan_priority;
397 const enum mlx5_nl_flow_trans *trans;
398 const enum mlx5_nl_flow_trans *back;
399
400 if (!size)
401 goto error_nobufs;
402 init:
403 item = pattern;
404 action = actions;
405 n = 0;
406 act_index_cur = 0;
407 in_port_id_set = false;
408 eth_type_set = false;
409 vlan_present = false;
410 vlan_eth_type_set = false;
411 ip_proto_set = false;
412 na_flower = NULL;
413 na_flower_act = NULL;
414 na_vlan_id = NULL;
415 na_vlan_priority = NULL;
416 trans = TRANS(ATTR);
417 back = trans;
418 trans:
419 switch (trans[n++]) {
420 union {
421 const struct rte_flow_item_port_id *port_id;
422 const struct rte_flow_item_eth *eth;
423 const struct rte_flow_item_vlan *vlan;
424 const struct rte_flow_item_ipv4 *ipv4;
425 const struct rte_flow_item_ipv6 *ipv6;
426 const struct rte_flow_item_tcp *tcp;
427 const struct rte_flow_item_udp *udp;
428 } spec, mask;
429 union {
430 const struct rte_flow_action_port_id *port_id;
431 const struct rte_flow_action_of_push_vlan *of_push_vlan;
432 const struct rte_flow_action_of_set_vlan_vid *
433 of_set_vlan_vid;
434 const struct rte_flow_action_of_set_vlan_pcp *
435 of_set_vlan_pcp;
436 } conf;
437 struct nlmsghdr *nlh;
438 struct tcmsg *tcm;
439 struct nlattr *act_index;
440 struct nlattr *act;
441 unsigned int i;
442
443 case INVALID:
444 if (item->type)
445 return rte_flow_error_set
446 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
447 item, "unsupported pattern item combination");
448 else if (action->type)
449 return rte_flow_error_set
450 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
451 action, "unsupported action combination");
452 return rte_flow_error_set
453 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
454 "flow rule lacks some kind of fate action");
455 case BACK:
456 trans = back;
457 n = 0;
458 goto trans;
459 case ATTR:
460 /*
461 * Supported attributes: no groups, some priorities and
462 * ingress only. Don't care about transfer as it is the
463 * caller's problem.
464 */
465 if (attr->group)
466 return rte_flow_error_set
467 (error, ENOTSUP,
468 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
469 attr, "groups are not supported");
470 if (attr->priority > 0xfffe)
471 return rte_flow_error_set
472 (error, ENOTSUP,
473 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
474 attr, "lowest priority level is 0xfffe");
475 if (!attr->ingress)
476 return rte_flow_error_set
477 (error, ENOTSUP,
478 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
479 attr, "only ingress is supported");
480 if (attr->egress)
481 return rte_flow_error_set
482 (error, ENOTSUP,
483 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
484 attr, "egress is not supported");
485 if (size < mnl_nlmsg_size(sizeof(*tcm)))
486 goto error_nobufs;
487 nlh = mnl_nlmsg_put_header(buf);
488 nlh->nlmsg_type = 0;
489 nlh->nlmsg_flags = 0;
490 nlh->nlmsg_seq = 0;
491 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
492 tcm->tcm_family = AF_UNSPEC;
493 tcm->tcm_ifindex = ptoi[0].ifindex;
494 /*
495 * Let kernel pick a handle by default. A predictable handle
496 * can be set by the caller on the resulting buffer through
497 * mlx5_nl_flow_brand().
498 */
499 tcm->tcm_handle = 0;
500 tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
501 /*
502 * Priority cannot be zero to prevent the kernel from
503 * picking one automatically.
504 */
505 tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
506 RTE_BE16(ETH_P_ALL));
507 break;
508 case PATTERN:
509 if (!mnl_attr_put_strz_check(buf, size, TCA_KIND, "flower"))
510 goto error_nobufs;
511 na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS);
512 if (!na_flower)
513 goto error_nobufs;
514 if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS,
515 TCA_CLS_FLAGS_SKIP_SW))
516 goto error_nobufs;
517 break;
518 case ITEM_VOID:
519 if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
520 goto trans;
521 ++item;
522 break;
523 case ITEM_PORT_ID:
524 if (item->type != RTE_FLOW_ITEM_TYPE_PORT_ID)
525 goto trans;
526 mask.port_id = mlx5_nl_flow_item_mask
527 (item, &rte_flow_item_port_id_mask,
528 &mlx5_nl_flow_mask_supported.port_id,
529 &mlx5_nl_flow_mask_empty.port_id,
530 sizeof(mlx5_nl_flow_mask_supported.port_id), error);
531 if (!mask.port_id)
532 return -rte_errno;
533 if (mask.port_id == &mlx5_nl_flow_mask_empty.port_id) {
534 in_port_id_set = 1;
535 ++item;
536 break;
537 }
538 spec.port_id = item->spec;
539 if (mask.port_id->id && mask.port_id->id != 0xffffffff)
540 return rte_flow_error_set
541 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
542 mask.port_id,
543 "no support for partial mask on"
544 " \"id\" field");
545 if (!mask.port_id->id)
546 i = 0;
547 else
548 for (i = 0; ptoi[i].ifindex; ++i)
549 if (ptoi[i].port_id == spec.port_id->id)
550 break;
551 if (!ptoi[i].ifindex)
552 return rte_flow_error_set
553 (error, ENODEV, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
554 spec.port_id,
555 "missing data to convert port ID to ifindex");
556 tcm = mnl_nlmsg_get_payload(buf);
557 if (in_port_id_set &&
558 ptoi[i].ifindex != (unsigned int)tcm->tcm_ifindex)
559 return rte_flow_error_set
560 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
561 spec.port_id,
562 "cannot match traffic for several port IDs"
563 " through a single flow rule");
564 tcm->tcm_ifindex = ptoi[i].ifindex;
565 in_port_id_set = 1;
566 ++item;
567 break;
568 case ITEM_ETH:
569 if (item->type != RTE_FLOW_ITEM_TYPE_ETH)
570 goto trans;
571 mask.eth = mlx5_nl_flow_item_mask
572 (item, &rte_flow_item_eth_mask,
573 &mlx5_nl_flow_mask_supported.eth,
574 &mlx5_nl_flow_mask_empty.eth,
575 sizeof(mlx5_nl_flow_mask_supported.eth), error);
576 if (!mask.eth)
577 return -rte_errno;
578 if (mask.eth == &mlx5_nl_flow_mask_empty.eth) {
579 ++item;
580 break;
581 }
582 spec.eth = item->spec;
583 if (mask.eth->type && mask.eth->type != RTE_BE16(0xffff))
584 return rte_flow_error_set
585 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
586 mask.eth,
587 "no support for partial mask on"
588 " \"type\" field");
589 if (mask.eth->type) {
590 if (!mnl_attr_put_u16_check(buf, size,
591 TCA_FLOWER_KEY_ETH_TYPE,
592 spec.eth->type))
593 goto error_nobufs;
594 eth_type_set = 1;
595 }
596 if ((!is_zero_ether_addr(&mask.eth->dst) &&
597 (!mnl_attr_put_check(buf, size,
598 TCA_FLOWER_KEY_ETH_DST,
599 ETHER_ADDR_LEN,
600 spec.eth->dst.addr_bytes) ||
601 !mnl_attr_put_check(buf, size,
602 TCA_FLOWER_KEY_ETH_DST_MASK,
603 ETHER_ADDR_LEN,
604 mask.eth->dst.addr_bytes))) ||
605 (!is_zero_ether_addr(&mask.eth->src) &&
606 (!mnl_attr_put_check(buf, size,
607 TCA_FLOWER_KEY_ETH_SRC,
608 ETHER_ADDR_LEN,
609 spec.eth->src.addr_bytes) ||
610 !mnl_attr_put_check(buf, size,
611 TCA_FLOWER_KEY_ETH_SRC_MASK,
612 ETHER_ADDR_LEN,
613 mask.eth->src.addr_bytes))))
614 goto error_nobufs;
615 ++item;
616 break;
617 case ITEM_VLAN:
618 if (item->type != RTE_FLOW_ITEM_TYPE_VLAN)
619 goto trans;
620 mask.vlan = mlx5_nl_flow_item_mask
621 (item, &rte_flow_item_vlan_mask,
622 &mlx5_nl_flow_mask_supported.vlan,
623 &mlx5_nl_flow_mask_empty.vlan,
624 sizeof(mlx5_nl_flow_mask_supported.vlan), error);
625 if (!mask.vlan)
626 return -rte_errno;
627 if (!eth_type_set &&
628 !mnl_attr_put_u16_check(buf, size,
629 TCA_FLOWER_KEY_ETH_TYPE,
630 RTE_BE16(ETH_P_8021Q)))
631 goto error_nobufs;
632 eth_type_set = 1;
633 vlan_present = 1;
634 if (mask.vlan == &mlx5_nl_flow_mask_empty.vlan) {
635 ++item;
636 break;
637 }
638 spec.vlan = item->spec;
639 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
640 (mask.vlan->tci & RTE_BE16(0xe000)) != RTE_BE16(0xe000)) ||
641 (mask.vlan->tci & RTE_BE16(0x0fff) &&
642 (mask.vlan->tci & RTE_BE16(0x0fff)) != RTE_BE16(0x0fff)) ||
643 (mask.vlan->inner_type &&
644 mask.vlan->inner_type != RTE_BE16(0xffff)))
645 return rte_flow_error_set
646 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
647 mask.vlan,
648 "no support for partial masks on"
649 " \"tci\" (PCP and VID parts) and"
650 " \"inner_type\" fields");
651 if (mask.vlan->inner_type) {
652 if (!mnl_attr_put_u16_check
653 (buf, size, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
654 spec.vlan->inner_type))
655 goto error_nobufs;
656 vlan_eth_type_set = 1;
657 }
658 if ((mask.vlan->tci & RTE_BE16(0xe000) &&
659 !mnl_attr_put_u8_check
660 (buf, size, TCA_FLOWER_KEY_VLAN_PRIO,
661 (rte_be_to_cpu_16(spec.vlan->tci) >> 13) & 0x7)) ||
662 (mask.vlan->tci & RTE_BE16(0x0fff) &&
663 !mnl_attr_put_u16_check
664 (buf, size, TCA_FLOWER_KEY_VLAN_ID,
665 rte_be_to_cpu_16(spec.vlan->tci & RTE_BE16(0x0fff)))))
666 goto error_nobufs;
667 ++item;
668 break;
669 case ITEM_IPV4:
670 if (item->type != RTE_FLOW_ITEM_TYPE_IPV4)
671 goto trans;
672 mask.ipv4 = mlx5_nl_flow_item_mask
673 (item, &rte_flow_item_ipv4_mask,
674 &mlx5_nl_flow_mask_supported.ipv4,
675 &mlx5_nl_flow_mask_empty.ipv4,
676 sizeof(mlx5_nl_flow_mask_supported.ipv4), error);
677 if (!mask.ipv4)
678 return -rte_errno;
679 if ((!eth_type_set || !vlan_eth_type_set) &&
680 !mnl_attr_put_u16_check(buf, size,
681 vlan_present ?
682 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
683 TCA_FLOWER_KEY_ETH_TYPE,
684 RTE_BE16(ETH_P_IP)))
685 goto error_nobufs;
686 eth_type_set = 1;
687 vlan_eth_type_set = 1;
688 if (mask.ipv4 == &mlx5_nl_flow_mask_empty.ipv4) {
689 ++item;
690 break;
691 }
692 spec.ipv4 = item->spec;
693 if (mask.ipv4->hdr.next_proto_id &&
694 mask.ipv4->hdr.next_proto_id != 0xff)
695 return rte_flow_error_set
696 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
697 mask.ipv4,
698 "no support for partial mask on"
699 " \"hdr.next_proto_id\" field");
700 if (mask.ipv4->hdr.next_proto_id) {
701 if (!mnl_attr_put_u8_check
702 (buf, size, TCA_FLOWER_KEY_IP_PROTO,
703 spec.ipv4->hdr.next_proto_id))
704 goto error_nobufs;
705 ip_proto_set = 1;
706 }
707 if ((mask.ipv4->hdr.src_addr &&
708 (!mnl_attr_put_u32_check(buf, size,
709 TCA_FLOWER_KEY_IPV4_SRC,
710 spec.ipv4->hdr.src_addr) ||
711 !mnl_attr_put_u32_check(buf, size,
712 TCA_FLOWER_KEY_IPV4_SRC_MASK,
713 mask.ipv4->hdr.src_addr))) ||
714 (mask.ipv4->hdr.dst_addr &&
715 (!mnl_attr_put_u32_check(buf, size,
716 TCA_FLOWER_KEY_IPV4_DST,
717 spec.ipv4->hdr.dst_addr) ||
718 !mnl_attr_put_u32_check(buf, size,
719 TCA_FLOWER_KEY_IPV4_DST_MASK,
720 mask.ipv4->hdr.dst_addr))))
721 goto error_nobufs;
722 ++item;
723 break;
724 case ITEM_IPV6:
725 if (item->type != RTE_FLOW_ITEM_TYPE_IPV6)
726 goto trans;
727 mask.ipv6 = mlx5_nl_flow_item_mask
728 (item, &rte_flow_item_ipv6_mask,
729 &mlx5_nl_flow_mask_supported.ipv6,
730 &mlx5_nl_flow_mask_empty.ipv6,
731 sizeof(mlx5_nl_flow_mask_supported.ipv6), error);
732 if (!mask.ipv6)
733 return -rte_errno;
734 if ((!eth_type_set || !vlan_eth_type_set) &&
735 !mnl_attr_put_u16_check(buf, size,
736 vlan_present ?
737 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
738 TCA_FLOWER_KEY_ETH_TYPE,
739 RTE_BE16(ETH_P_IPV6)))
740 goto error_nobufs;
741 eth_type_set = 1;
742 vlan_eth_type_set = 1;
743 if (mask.ipv6 == &mlx5_nl_flow_mask_empty.ipv6) {
744 ++item;
745 break;
746 }
747 spec.ipv6 = item->spec;
748 if (mask.ipv6->hdr.proto && mask.ipv6->hdr.proto != 0xff)
749 return rte_flow_error_set
750 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
751 mask.ipv6,
752 "no support for partial mask on"
753 " \"hdr.proto\" field");
754 if (mask.ipv6->hdr.proto) {
755 if (!mnl_attr_put_u8_check
756 (buf, size, TCA_FLOWER_KEY_IP_PROTO,
757 spec.ipv6->hdr.proto))
758 goto error_nobufs;
759 ip_proto_set = 1;
760 }
761 if ((!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr) &&
762 (!mnl_attr_put_check(buf, size,
763 TCA_FLOWER_KEY_IPV6_SRC,
764 sizeof(spec.ipv6->hdr.src_addr),
765 spec.ipv6->hdr.src_addr) ||
766 !mnl_attr_put_check(buf, size,
767 TCA_FLOWER_KEY_IPV6_SRC_MASK,
768 sizeof(mask.ipv6->hdr.src_addr),
769 mask.ipv6->hdr.src_addr))) ||
770 (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr) &&
771 (!mnl_attr_put_check(buf, size,
772 TCA_FLOWER_KEY_IPV6_DST,
773 sizeof(spec.ipv6->hdr.dst_addr),
774 spec.ipv6->hdr.dst_addr) ||
775 !mnl_attr_put_check(buf, size,
776 TCA_FLOWER_KEY_IPV6_DST_MASK,
777 sizeof(mask.ipv6->hdr.dst_addr),
778 mask.ipv6->hdr.dst_addr))))
779 goto error_nobufs;
780 ++item;
781 break;
782 case ITEM_TCP:
783 if (item->type != RTE_FLOW_ITEM_TYPE_TCP)
784 goto trans;
785 mask.tcp = mlx5_nl_flow_item_mask
786 (item, &rte_flow_item_tcp_mask,
787 &mlx5_nl_flow_mask_supported.tcp,
788 &mlx5_nl_flow_mask_empty.tcp,
789 sizeof(mlx5_nl_flow_mask_supported.tcp), error);
790 if (!mask.tcp)
791 return -rte_errno;
792 if (!ip_proto_set &&
793 !mnl_attr_put_u8_check(buf, size,
794 TCA_FLOWER_KEY_IP_PROTO,
795 IPPROTO_TCP))
796 goto error_nobufs;
797 if (mask.tcp == &mlx5_nl_flow_mask_empty.tcp) {
798 ++item;
799 break;
800 }
801 spec.tcp = item->spec;
802 if ((mask.tcp->hdr.src_port &&
803 mask.tcp->hdr.src_port != RTE_BE16(0xffff)) ||
804 (mask.tcp->hdr.dst_port &&
805 mask.tcp->hdr.dst_port != RTE_BE16(0xffff)))
806 return rte_flow_error_set
807 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
808 mask.tcp,
809 "no support for partial masks on"
810 " \"hdr.src_port\" and \"hdr.dst_port\""
811 " fields");
812 if ((mask.tcp->hdr.src_port &&
813 (!mnl_attr_put_u16_check(buf, size,
814 TCA_FLOWER_KEY_TCP_SRC,
815 spec.tcp->hdr.src_port) ||
816 !mnl_attr_put_u16_check(buf, size,
817 TCA_FLOWER_KEY_TCP_SRC_MASK,
818 mask.tcp->hdr.src_port))) ||
819 (mask.tcp->hdr.dst_port &&
820 (!mnl_attr_put_u16_check(buf, size,
821 TCA_FLOWER_KEY_TCP_DST,
822 spec.tcp->hdr.dst_port) ||
823 !mnl_attr_put_u16_check(buf, size,
824 TCA_FLOWER_KEY_TCP_DST_MASK,
825 mask.tcp->hdr.dst_port))))
826 goto error_nobufs;
827 ++item;
828 break;
829 case ITEM_UDP:
830 if (item->type != RTE_FLOW_ITEM_TYPE_UDP)
831 goto trans;
832 mask.udp = mlx5_nl_flow_item_mask
833 (item, &rte_flow_item_udp_mask,
834 &mlx5_nl_flow_mask_supported.udp,
835 &mlx5_nl_flow_mask_empty.udp,
836 sizeof(mlx5_nl_flow_mask_supported.udp), error);
837 if (!mask.udp)
838 return -rte_errno;
839 if (!ip_proto_set &&
840 !mnl_attr_put_u8_check(buf, size,
841 TCA_FLOWER_KEY_IP_PROTO,
842 IPPROTO_UDP))
843 goto error_nobufs;
844 if (mask.udp == &mlx5_nl_flow_mask_empty.udp) {
845 ++item;
846 break;
847 }
848 spec.udp = item->spec;
849 if ((mask.udp->hdr.src_port &&
850 mask.udp->hdr.src_port != RTE_BE16(0xffff)) ||
851 (mask.udp->hdr.dst_port &&
852 mask.udp->hdr.dst_port != RTE_BE16(0xffff)))
853 return rte_flow_error_set
854 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
855 mask.udp,
856 "no support for partial masks on"
857 " \"hdr.src_port\" and \"hdr.dst_port\""
858 " fields");
859 if ((mask.udp->hdr.src_port &&
860 (!mnl_attr_put_u16_check(buf, size,
861 TCA_FLOWER_KEY_UDP_SRC,
862 spec.udp->hdr.src_port) ||
863 !mnl_attr_put_u16_check(buf, size,
864 TCA_FLOWER_KEY_UDP_SRC_MASK,
865 mask.udp->hdr.src_port))) ||
866 (mask.udp->hdr.dst_port &&
867 (!mnl_attr_put_u16_check(buf, size,
868 TCA_FLOWER_KEY_UDP_DST,
869 spec.udp->hdr.dst_port) ||
870 !mnl_attr_put_u16_check(buf, size,
871 TCA_FLOWER_KEY_UDP_DST_MASK,
872 mask.udp->hdr.dst_port))))
873 goto error_nobufs;
874 ++item;
875 break;
876 case ACTIONS:
877 if (item->type != RTE_FLOW_ITEM_TYPE_END)
878 goto trans;
879 assert(na_flower);
880 assert(!na_flower_act);
881 na_flower_act =
882 mnl_attr_nest_start_check(buf, size, TCA_FLOWER_ACT);
883 if (!na_flower_act)
884 goto error_nobufs;
885 act_index_cur = 1;
886 break;
887 case ACTION_VOID:
888 if (action->type != RTE_FLOW_ACTION_TYPE_VOID)
889 goto trans;
890 ++action;
891 break;
892 case ACTION_PORT_ID:
893 if (action->type != RTE_FLOW_ACTION_TYPE_PORT_ID)
894 goto trans;
895 conf.port_id = action->conf;
896 if (conf.port_id->original)
897 i = 0;
898 else
899 for (i = 0; ptoi[i].ifindex; ++i)
900 if (ptoi[i].port_id == conf.port_id->id)
901 break;
902 if (!ptoi[i].ifindex)
903 return rte_flow_error_set
904 (error, ENODEV, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
905 conf.port_id,
906 "missing data to convert port ID to ifindex");
907 act_index =
908 mnl_attr_nest_start_check(buf, size, act_index_cur++);
909 if (!act_index ||
910 !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "mirred"))
911 goto error_nobufs;
912 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
913 if (!act)
914 goto error_nobufs;
915 if (!mnl_attr_put_check(buf, size, TCA_MIRRED_PARMS,
916 sizeof(struct tc_mirred),
917 &(struct tc_mirred){
918 .action = TC_ACT_STOLEN,
919 .eaction = TCA_EGRESS_REDIR,
920 .ifindex = ptoi[i].ifindex,
921 }))
922 goto error_nobufs;
923 mnl_attr_nest_end(buf, act);
924 mnl_attr_nest_end(buf, act_index);
925 ++action;
926 break;
927 case ACTION_DROP:
928 if (action->type != RTE_FLOW_ACTION_TYPE_DROP)
929 goto trans;
930 act_index =
931 mnl_attr_nest_start_check(buf, size, act_index_cur++);
932 if (!act_index ||
933 !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "gact"))
934 goto error_nobufs;
935 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
936 if (!act)
937 goto error_nobufs;
938 if (!mnl_attr_put_check(buf, size, TCA_GACT_PARMS,
939 sizeof(struct tc_gact),
940 &(struct tc_gact){
941 .action = TC_ACT_SHOT,
942 }))
943 goto error_nobufs;
944 mnl_attr_nest_end(buf, act);
945 mnl_attr_nest_end(buf, act_index);
946 ++action;
947 break;
948 case ACTION_OF_POP_VLAN:
949 if (action->type != RTE_FLOW_ACTION_TYPE_OF_POP_VLAN)
950 goto trans;
951 conf.of_push_vlan = NULL;
952 i = TCA_VLAN_ACT_POP;
953 goto action_of_vlan;
954 case ACTION_OF_PUSH_VLAN:
955 if (action->type != RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN)
956 goto trans;
957 conf.of_push_vlan = action->conf;
958 i = TCA_VLAN_ACT_PUSH;
959 goto action_of_vlan;
960 case ACTION_OF_SET_VLAN_VID:
961 if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID)
962 goto trans;
963 conf.of_set_vlan_vid = action->conf;
964 if (na_vlan_id)
965 goto override_na_vlan_id;
966 i = TCA_VLAN_ACT_MODIFY;
967 goto action_of_vlan;
968 case ACTION_OF_SET_VLAN_PCP:
969 if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP)
970 goto trans;
971 conf.of_set_vlan_pcp = action->conf;
972 if (na_vlan_priority)
973 goto override_na_vlan_priority;
974 i = TCA_VLAN_ACT_MODIFY;
975 goto action_of_vlan;
976 action_of_vlan:
977 act_index =
978 mnl_attr_nest_start_check(buf, size, act_index_cur++);
979 if (!act_index ||
980 !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "vlan"))
981 goto error_nobufs;
982 act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
983 if (!act)
984 goto error_nobufs;
985 if (!mnl_attr_put_check(buf, size, TCA_VLAN_PARMS,
986 sizeof(struct tc_vlan),
987 &(struct tc_vlan){
988 .action = TC_ACT_PIPE,
989 .v_action = i,
990 }))
991 goto error_nobufs;
992 if (i == TCA_VLAN_ACT_POP) {
993 mnl_attr_nest_end(buf, act);
994 mnl_attr_nest_end(buf, act_index);
995 ++action;
996 break;
997 }
998 if (i == TCA_VLAN_ACT_PUSH &&
999 !mnl_attr_put_u16_check(buf, size,
1000 TCA_VLAN_PUSH_VLAN_PROTOCOL,
1001 conf.of_push_vlan->ethertype))
1002 goto error_nobufs;
1003 na_vlan_id = mnl_nlmsg_get_payload_tail(buf);
1004 if (!mnl_attr_put_u16_check(buf, size, TCA_VLAN_PAD, 0))
1005 goto error_nobufs;
1006 na_vlan_priority = mnl_nlmsg_get_payload_tail(buf);
1007 if (!mnl_attr_put_u8_check(buf, size, TCA_VLAN_PAD, 0))
1008 goto error_nobufs;
1009 mnl_attr_nest_end(buf, act);
1010 mnl_attr_nest_end(buf, act_index);
1011 if (action->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1012 override_na_vlan_id:
1013 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1014 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1015 rte_be_to_cpu_16
1016 (conf.of_set_vlan_vid->vlan_vid);
1017 } else if (action->type ==
1018 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1019 override_na_vlan_priority:
1020 na_vlan_priority->nla_type =
1021 TCA_VLAN_PUSH_VLAN_PRIORITY;
1022 *(uint8_t *)mnl_attr_get_payload(na_vlan_priority) =
1023 conf.of_set_vlan_pcp->vlan_pcp;
1024 }
1025 ++action;
1026 break;
1027 case END:
1028 if (item->type != RTE_FLOW_ITEM_TYPE_END ||
1029 action->type != RTE_FLOW_ACTION_TYPE_END)
1030 goto trans;
1031 if (na_flower_act)
1032 mnl_attr_nest_end(buf, na_flower_act);
1033 if (na_flower)
1034 mnl_attr_nest_end(buf, na_flower);
1035 nlh = buf;
1036 return nlh->nlmsg_len;
1037 }
1038 back = trans;
1039 trans = mlx5_nl_flow_trans[trans[n - 1]];
1040 n = 0;
1041 goto trans;
1042 error_nobufs:
1043 if (buf != buf_tmp) {
1044 buf = buf_tmp;
1045 size = sizeof(buf_tmp);
1046 goto init;
1047 }
1048 return rte_flow_error_set
1049 (error, ENOBUFS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1050 "generated TC message is too large");
1051 }
1052
1053 /**
1054 * Brand rtnetlink buffer with unique handle.
1055 *
1056 * This handle should be unique for a given network interface to avoid
1057 * collisions.
1058 *
1059 * @param buf
1060 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1061 * @param handle
1062 * Unique 32-bit handle to use.
1063 */
1064 void
1065 mlx5_nl_flow_brand(void *buf, uint32_t handle)
1066 {
1067 struct tcmsg *tcm = mnl_nlmsg_get_payload(buf);
1068
1069 tcm->tcm_handle = handle;
1070 }
1071
1072 /**
1073 * Send Netlink message with acknowledgment.
1074 *
1075 * @param nl
1076 * Libmnl socket to use.
1077 * @param nlh
1078 * Message to send. This function always raises the NLM_F_ACK flag before
1079 * sending.
1080 *
1081 * @return
1082 * 0 on success, a negative errno value otherwise and rte_errno is set.
1083 */
1084 static int
1085 mlx5_nl_flow_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1086 {
1087 alignas(struct nlmsghdr)
1088 uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1089 nlh->nlmsg_len - sizeof(*nlh)];
1090 uint32_t seq = random();
1091 int ret;
1092
1093 nlh->nlmsg_flags |= NLM_F_ACK;
1094 nlh->nlmsg_seq = seq;
1095 ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1096 if (ret != -1)
1097 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1098 if (ret != -1)
1099 ret = mnl_cb_run
1100 (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1101 if (!ret)
1102 return 0;
1103 rte_errno = errno;
1104 return -rte_errno;
1105 }
1106
1107 /**
1108 * Create a Netlink flow rule.
1109 *
1110 * @param nl
1111 * Libmnl socket to use.
1112 * @param buf
1113 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1114 * @param[out] error
1115 * Perform verbose error reporting if not NULL.
1116 *
1117 * @return
1118 * 0 on success, a negative errno value otherwise and rte_errno is set.
1119 */
1120 int
1121 mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
1122 struct rte_flow_error *error)
1123 {
1124 struct nlmsghdr *nlh = buf;
1125
1126 nlh->nlmsg_type = RTM_NEWTFILTER;
1127 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1128 if (!mlx5_nl_flow_nl_ack(nl, nlh))
1129 return 0;
1130 return rte_flow_error_set
1131 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1132 "netlink: failed to create TC flow rule");
1133 }
1134
1135 /**
1136 * Destroy a Netlink flow rule.
1137 *
1138 * @param nl
1139 * Libmnl socket to use.
1140 * @param buf
1141 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1142 * @param[out] error
1143 * Perform verbose error reporting if not NULL.
1144 *
1145 * @return
1146 * 0 on success, a negative errno value otherwise and rte_errno is set.
1147 */
1148 int
1149 mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
1150 struct rte_flow_error *error)
1151 {
1152 struct nlmsghdr *nlh = buf;
1153
1154 nlh->nlmsg_type = RTM_DELTFILTER;
1155 nlh->nlmsg_flags = NLM_F_REQUEST;
1156 if (!mlx5_nl_flow_nl_ack(nl, nlh))
1157 return 0;
1158 return rte_flow_error_set
1159 (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1160 "netlink: failed to destroy TC flow rule");
1161 }
1162
1163 /**
1164 * Initialize ingress qdisc of a given network interface.
1165 *
1166 * @param nl
1167 * Libmnl socket of the @p NETLINK_ROUTE kind.
1168 * @param ifindex
1169 * Index of network interface to initialize.
1170 * @param[out] error
1171 * Perform verbose error reporting if not NULL.
1172 *
1173 * @return
1174 * 0 on success, a negative errno value otherwise and rte_errno is set.
1175 */
1176 int
1177 mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
1178 struct rte_flow_error *error)
1179 {
1180 struct nlmsghdr *nlh;
1181 struct tcmsg *tcm;
1182 alignas(struct nlmsghdr)
1183 uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
1184
1185 /* Destroy existing ingress qdisc and everything attached to it. */
1186 nlh = mnl_nlmsg_put_header(buf);
1187 nlh->nlmsg_type = RTM_DELQDISC;
1188 nlh->nlmsg_flags = NLM_F_REQUEST;
1189 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1190 tcm->tcm_family = AF_UNSPEC;
1191 tcm->tcm_ifindex = ifindex;
1192 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1193 tcm->tcm_parent = TC_H_INGRESS;
1194 /* Ignore errors when qdisc is already absent. */
1195 if (mlx5_nl_flow_nl_ack(nl, nlh) &&
1196 rte_errno != EINVAL && rte_errno != ENOENT)
1197 return rte_flow_error_set
1198 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1199 NULL, "netlink: failed to remove ingress qdisc");
1200 /* Create fresh ingress qdisc. */
1201 nlh = mnl_nlmsg_put_header(buf);
1202 nlh->nlmsg_type = RTM_NEWQDISC;
1203 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1204 tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1205 tcm->tcm_family = AF_UNSPEC;
1206 tcm->tcm_ifindex = ifindex;
1207 tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1208 tcm->tcm_parent = TC_H_INGRESS;
1209 mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
1210 if (mlx5_nl_flow_nl_ack(nl, nlh))
1211 return rte_flow_error_set
1212 (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1213 NULL, "netlink: failed to create ingress qdisc");
1214 return 0;
1215 }
1216
1217 /**
1218 * Create and configure a libmnl socket for Netlink flow rules.
1219 *
1220 * @return
1221 * A valid libmnl socket object pointer on success, NULL otherwise and
1222 * rte_errno is set.
1223 */
1224 struct mnl_socket *
1225 mlx5_nl_flow_socket_create(void)
1226 {
1227 struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
1228
1229 if (nl) {
1230 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
1231 sizeof(int));
1232 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
1233 return nl;
1234 }
1235 rte_errno = errno;
1236 if (nl)
1237 mnl_socket_close(nl);
1238 return NULL;
1239 }
1240
1241 /**
1242 * Destroy a libmnl socket.
1243 */
1244 void
1245 mlx5_nl_flow_socket_destroy(struct mnl_socket *nl)
1246 {
1247 mnl_socket_close(nl);
1248 }