1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
22 #include <sys/socket.h>
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
30 #include "mlx5_autoconf.h"
32 #ifdef HAVE_TC_ACT_VLAN
34 #include <linux/tc_act/tc_vlan.h>
36 #else /* HAVE_TC_ACT_VLAN */
38 #define TCA_VLAN_ACT_POP 1
39 #define TCA_VLAN_ACT_PUSH 2
40 #define TCA_VLAN_ACT_MODIFY 3
41 #define TCA_VLAN_PARMS 2
42 #define TCA_VLAN_PUSH_VLAN_ID 3
43 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
44 #define TCA_VLAN_PAD 5
45 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
52 #endif /* HAVE_TC_ACT_VLAN */
54 /* Normally found in linux/netlink.h. */
55 #ifndef NETLINK_CAP_ACK
56 #define NETLINK_CAP_ACK 10
59 /* Normally found in linux/pkt_sched.h. */
60 #ifndef TC_H_MIN_INGRESS
61 #define TC_H_MIN_INGRESS 0xfff2u
64 /* Normally found in linux/pkt_cls.h. */
65 #ifndef TCA_CLS_FLAGS_SKIP_SW
66 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
68 #ifndef HAVE_TCA_FLOWER_ACT
69 #define TCA_FLOWER_ACT 3
71 #ifndef HAVE_TCA_FLOWER_FLAGS
72 #define TCA_FLOWER_FLAGS 22
74 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
75 #define TCA_FLOWER_KEY_ETH_TYPE 8
77 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
78 #define TCA_FLOWER_KEY_ETH_DST 4
80 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
81 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
83 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
84 #define TCA_FLOWER_KEY_ETH_SRC 6
86 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
87 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
89 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
90 #define TCA_FLOWER_KEY_IP_PROTO 9
92 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
93 #define TCA_FLOWER_KEY_IPV4_SRC 10
95 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
96 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
98 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
99 #define TCA_FLOWER_KEY_IPV4_DST 12
101 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
102 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
104 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
105 #define TCA_FLOWER_KEY_IPV6_SRC 14
107 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
108 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
110 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
111 #define TCA_FLOWER_KEY_IPV6_DST 16
113 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
114 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
116 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
117 #define TCA_FLOWER_KEY_TCP_SRC 18
119 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
120 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
122 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
123 #define TCA_FLOWER_KEY_TCP_DST 19
125 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
126 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
128 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
129 #define TCA_FLOWER_KEY_UDP_SRC 20
131 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
132 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
134 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
135 #define TCA_FLOWER_KEY_UDP_DST 21
137 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
138 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
140 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
141 #define TCA_FLOWER_KEY_VLAN_ID 23
143 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
144 #define TCA_FLOWER_KEY_VLAN_PRIO 24
146 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
147 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
150 /** Parser state definitions for mlx5_nl_flow_trans[]. */
151 enum mlx5_nl_flow_trans
{
170 ACTION_OF_SET_VLAN_VID
,
171 ACTION_OF_SET_VLAN_PCP
,
175 #define TRANS(...) (const enum mlx5_nl_flow_trans []){ __VA_ARGS__, INVALID, }
177 #define PATTERN_COMMON \
178 ITEM_VOID, ITEM_PORT_ID, ACTIONS
179 #define ACTIONS_COMMON \
180 ACTION_VOID, ACTION_OF_POP_VLAN, ACTION_OF_PUSH_VLAN, \
181 ACTION_OF_SET_VLAN_VID, ACTION_OF_SET_VLAN_PCP
182 #define ACTIONS_FATE \
183 ACTION_PORT_ID, ACTION_DROP
185 /** Parser state transitions used by mlx5_nl_flow_transpose(). */
186 static const enum mlx5_nl_flow_trans
*const mlx5_nl_flow_trans
[] = {
189 [ATTR
] = TRANS(PATTERN
),
190 [PATTERN
] = TRANS(ITEM_ETH
, PATTERN_COMMON
),
191 [ITEM_VOID
] = TRANS(BACK
),
192 [ITEM_PORT_ID
] = TRANS(BACK
),
193 [ITEM_ETH
] = TRANS(ITEM_IPV4
, ITEM_IPV6
, ITEM_VLAN
, PATTERN_COMMON
),
194 [ITEM_VLAN
] = TRANS(ITEM_IPV4
, ITEM_IPV6
, PATTERN_COMMON
),
195 [ITEM_IPV4
] = TRANS(ITEM_TCP
, ITEM_UDP
, PATTERN_COMMON
),
196 [ITEM_IPV6
] = TRANS(ITEM_TCP
, ITEM_UDP
, PATTERN_COMMON
),
197 [ITEM_TCP
] = TRANS(PATTERN_COMMON
),
198 [ITEM_UDP
] = TRANS(PATTERN_COMMON
),
199 [ACTIONS
] = TRANS(ACTIONS_FATE
, ACTIONS_COMMON
),
200 [ACTION_VOID
] = TRANS(BACK
),
201 [ACTION_PORT_ID
] = TRANS(ACTION_VOID
, END
),
202 [ACTION_DROP
] = TRANS(ACTION_VOID
, END
),
203 [ACTION_OF_POP_VLAN
] = TRANS(ACTIONS_FATE
, ACTIONS_COMMON
),
204 [ACTION_OF_PUSH_VLAN
] = TRANS(ACTIONS_FATE
, ACTIONS_COMMON
),
205 [ACTION_OF_SET_VLAN_VID
] = TRANS(ACTIONS_FATE
, ACTIONS_COMMON
),
206 [ACTION_OF_SET_VLAN_PCP
] = TRANS(ACTIONS_FATE
, ACTIONS_COMMON
),
210 /** Empty masks for known item types. */
212 struct rte_flow_item_port_id port_id
;
213 struct rte_flow_item_eth eth
;
214 struct rte_flow_item_vlan vlan
;
215 struct rte_flow_item_ipv4 ipv4
;
216 struct rte_flow_item_ipv6 ipv6
;
217 struct rte_flow_item_tcp tcp
;
218 struct rte_flow_item_udp udp
;
219 } mlx5_nl_flow_mask_empty
;
221 /** Supported masks for known item types. */
222 static const struct {
223 struct rte_flow_item_port_id port_id
;
224 struct rte_flow_item_eth eth
;
225 struct rte_flow_item_vlan vlan
;
226 struct rte_flow_item_ipv4 ipv4
;
227 struct rte_flow_item_ipv6 ipv6
;
228 struct rte_flow_item_tcp tcp
;
229 struct rte_flow_item_udp udp
;
230 } mlx5_nl_flow_mask_supported
= {
235 .type
= RTE_BE16(0xffff),
236 .dst
.addr_bytes
= "\xff\xff\xff\xff\xff\xff",
237 .src
.addr_bytes
= "\xff\xff\xff\xff\xff\xff",
240 /* PCP and VID only, no DEI. */
241 .tci
= RTE_BE16(0xefff),
242 .inner_type
= RTE_BE16(0xffff),
245 .next_proto_id
= 0xff,
246 .src_addr
= RTE_BE32(0xffffffff),
247 .dst_addr
= RTE_BE32(0xffffffff),
252 "\xff\xff\xff\xff\xff\xff\xff\xff"
253 "\xff\xff\xff\xff\xff\xff\xff\xff",
255 "\xff\xff\xff\xff\xff\xff\xff\xff"
256 "\xff\xff\xff\xff\xff\xff\xff\xff",
259 .src_port
= RTE_BE16(0xffff),
260 .dst_port
= RTE_BE16(0xffff),
263 .src_port
= RTE_BE16(0xffff),
264 .dst_port
= RTE_BE16(0xffff),
269 * Retrieve mask for pattern item.
271 * This function does basic sanity checks on a pattern item in order to
272 * return the most appropriate mask for it.
275 * Item specification.
276 * @param[in] mask_default
277 * Default mask for pattern item as specified by the flow API.
278 * @param[in] mask_supported
279 * Mask fields supported by the implementation.
280 * @param[in] mask_empty
281 * Empty mask to return when there is no specification.
283 * Perform verbose error reporting if not NULL.
286 * Either @p item->mask or one of the mask parameters on success, NULL
287 * otherwise and rte_errno is set.
290 mlx5_nl_flow_item_mask(const struct rte_flow_item
*item
,
291 const void *mask_default
,
292 const void *mask_supported
,
293 const void *mask_empty
,
295 struct rte_flow_error
*error
)
300 /* item->last and item->mask cannot exist without item->spec. */
301 if (!item
->spec
&& (item
->mask
|| item
->last
)) {
303 (error
, EINVAL
, RTE_FLOW_ERROR_TYPE_ITEM
, item
,
304 "\"mask\" or \"last\" field provided without a"
305 " corresponding \"spec\"");
308 /* No spec, no mask, no problem. */
311 mask
= item
->mask
? item
->mask
: mask_default
;
314 * Single-pass check to make sure that:
315 * - Mask is supported, no bits are set outside mask_supported.
316 * - Both item->spec and item->last are included in mask.
318 for (i
= 0; i
!= mask_size
; ++i
) {
321 if ((mask
[i
] | ((const uint8_t *)mask_supported
)[i
]) !=
322 ((const uint8_t *)mask_supported
)[i
]) {
324 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
325 mask
, "unsupported field found in \"mask\"");
329 (((const uint8_t *)item
->spec
)[i
] & mask
[i
]) !=
330 (((const uint8_t *)item
->last
)[i
] & mask
[i
])) {
332 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_ITEM_LAST
,
334 "range between \"spec\" and \"last\" not"
335 " comprised in \"mask\"");
343 * Transpose flow rule description to rtnetlink message.
345 * This function transposes a flow rule description to a traffic control
346 * (TC) filter creation message ready to be sent over Netlink.
348 * Target interface is specified as the first entry of the @p ptoi table.
349 * Subsequent entries enable this function to resolve other DPDK port IDs
350 * found in the flow rule.
353 * Output message buffer. May be NULL when @p size is 0.
355 * Size of @p buf. Message may be truncated if not large enough.
357 * DPDK port ID to network interface index translation table. This table
358 * is terminated by an entry with a zero ifindex value.
360 * Flow rule attributes.
362 * Pattern specification.
364 * Associated actions.
366 * Perform verbose error reporting if not NULL.
369 * A positive value representing the exact size of the message in bytes
370 * regardless of the @p size parameter on success, a negative errno value
371 * otherwise and rte_errno is set.
374 mlx5_nl_flow_transpose(void *buf
,
376 const struct mlx5_nl_flow_ptoi
*ptoi
,
377 const struct rte_flow_attr
*attr
,
378 const struct rte_flow_item
*pattern
,
379 const struct rte_flow_action
*actions
,
380 struct rte_flow_error
*error
)
382 alignas(struct nlmsghdr
)
383 uint8_t buf_tmp
[mnl_nlmsg_size(sizeof(struct tcmsg
) + 1024)];
384 const struct rte_flow_item
*item
;
385 const struct rte_flow_action
*action
;
387 uint32_t act_index_cur
;
391 bool vlan_eth_type_set
;
393 struct nlattr
*na_flower
;
394 struct nlattr
*na_flower_act
;
395 struct nlattr
*na_vlan_id
;
396 struct nlattr
*na_vlan_priority
;
397 const enum mlx5_nl_flow_trans
*trans
;
398 const enum mlx5_nl_flow_trans
*back
;
407 in_port_id_set
= false;
408 eth_type_set
= false;
409 vlan_present
= false;
410 vlan_eth_type_set
= false;
411 ip_proto_set
= false;
413 na_flower_act
= NULL
;
415 na_vlan_priority
= NULL
;
419 switch (trans
[n
++]) {
421 const struct rte_flow_item_port_id
*port_id
;
422 const struct rte_flow_item_eth
*eth
;
423 const struct rte_flow_item_vlan
*vlan
;
424 const struct rte_flow_item_ipv4
*ipv4
;
425 const struct rte_flow_item_ipv6
*ipv6
;
426 const struct rte_flow_item_tcp
*tcp
;
427 const struct rte_flow_item_udp
*udp
;
430 const struct rte_flow_action_port_id
*port_id
;
431 const struct rte_flow_action_of_push_vlan
*of_push_vlan
;
432 const struct rte_flow_action_of_set_vlan_vid
*
434 const struct rte_flow_action_of_set_vlan_pcp
*
437 struct nlmsghdr
*nlh
;
439 struct nlattr
*act_index
;
445 return rte_flow_error_set
446 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_ITEM
,
447 item
, "unsupported pattern item combination");
448 else if (action
->type
)
449 return rte_flow_error_set
450 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_ACTION
,
451 action
, "unsupported action combination");
452 return rte_flow_error_set
453 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
454 "flow rule lacks some kind of fate action");
461 * Supported attributes: no groups, some priorities and
462 * ingress only. Don't care about transfer as it is the
466 return rte_flow_error_set
468 RTE_FLOW_ERROR_TYPE_ATTR_GROUP
,
469 attr
, "groups are not supported");
470 if (attr
->priority
> 0xfffe)
471 return rte_flow_error_set
473 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY
,
474 attr
, "lowest priority level is 0xfffe");
476 return rte_flow_error_set
478 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS
,
479 attr
, "only ingress is supported");
481 return rte_flow_error_set
483 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS
,
484 attr
, "egress is not supported");
485 if (size
< mnl_nlmsg_size(sizeof(*tcm
)))
487 nlh
= mnl_nlmsg_put_header(buf
);
489 nlh
->nlmsg_flags
= 0;
491 tcm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*tcm
));
492 tcm
->tcm_family
= AF_UNSPEC
;
493 tcm
->tcm_ifindex
= ptoi
[0].ifindex
;
495 * Let kernel pick a handle by default. A predictable handle
496 * can be set by the caller on the resulting buffer through
497 * mlx5_nl_flow_brand().
500 tcm
->tcm_parent
= TC_H_MAKE(TC_H_INGRESS
, TC_H_MIN_INGRESS
);
502 * Priority cannot be zero to prevent the kernel from
503 * picking one automatically.
505 tcm
->tcm_info
= TC_H_MAKE((attr
->priority
+ 1) << 16,
506 RTE_BE16(ETH_P_ALL
));
509 if (!mnl_attr_put_strz_check(buf
, size
, TCA_KIND
, "flower"))
511 na_flower
= mnl_attr_nest_start_check(buf
, size
, TCA_OPTIONS
);
514 if (!mnl_attr_put_u32_check(buf
, size
, TCA_FLOWER_FLAGS
,
515 TCA_CLS_FLAGS_SKIP_SW
))
519 if (item
->type
!= RTE_FLOW_ITEM_TYPE_VOID
)
524 if (item
->type
!= RTE_FLOW_ITEM_TYPE_PORT_ID
)
526 mask
.port_id
= mlx5_nl_flow_item_mask
527 (item
, &rte_flow_item_port_id_mask
,
528 &mlx5_nl_flow_mask_supported
.port_id
,
529 &mlx5_nl_flow_mask_empty
.port_id
,
530 sizeof(mlx5_nl_flow_mask_supported
.port_id
), error
);
533 if (mask
.port_id
== &mlx5_nl_flow_mask_empty
.port_id
) {
538 spec
.port_id
= item
->spec
;
539 if (mask
.port_id
->id
&& mask
.port_id
->id
!= 0xffffffff)
540 return rte_flow_error_set
541 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
543 "no support for partial mask on"
545 if (!mask
.port_id
->id
)
548 for (i
= 0; ptoi
[i
].ifindex
; ++i
)
549 if (ptoi
[i
].port_id
== spec
.port_id
->id
)
551 if (!ptoi
[i
].ifindex
)
552 return rte_flow_error_set
553 (error
, ENODEV
, RTE_FLOW_ERROR_TYPE_ITEM_SPEC
,
555 "missing data to convert port ID to ifindex");
556 tcm
= mnl_nlmsg_get_payload(buf
);
557 if (in_port_id_set
&&
558 ptoi
[i
].ifindex
!= (unsigned int)tcm
->tcm_ifindex
)
559 return rte_flow_error_set
560 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_ITEM_SPEC
,
562 "cannot match traffic for several port IDs"
563 " through a single flow rule");
564 tcm
->tcm_ifindex
= ptoi
[i
].ifindex
;
569 if (item
->type
!= RTE_FLOW_ITEM_TYPE_ETH
)
571 mask
.eth
= mlx5_nl_flow_item_mask
572 (item
, &rte_flow_item_eth_mask
,
573 &mlx5_nl_flow_mask_supported
.eth
,
574 &mlx5_nl_flow_mask_empty
.eth
,
575 sizeof(mlx5_nl_flow_mask_supported
.eth
), error
);
578 if (mask
.eth
== &mlx5_nl_flow_mask_empty
.eth
) {
582 spec
.eth
= item
->spec
;
583 if (mask
.eth
->type
&& mask
.eth
->type
!= RTE_BE16(0xffff))
584 return rte_flow_error_set
585 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
587 "no support for partial mask on"
589 if (mask
.eth
->type
) {
590 if (!mnl_attr_put_u16_check(buf
, size
,
591 TCA_FLOWER_KEY_ETH_TYPE
,
596 if ((!is_zero_ether_addr(&mask
.eth
->dst
) &&
597 (!mnl_attr_put_check(buf
, size
,
598 TCA_FLOWER_KEY_ETH_DST
,
600 spec
.eth
->dst
.addr_bytes
) ||
601 !mnl_attr_put_check(buf
, size
,
602 TCA_FLOWER_KEY_ETH_DST_MASK
,
604 mask
.eth
->dst
.addr_bytes
))) ||
605 (!is_zero_ether_addr(&mask
.eth
->src
) &&
606 (!mnl_attr_put_check(buf
, size
,
607 TCA_FLOWER_KEY_ETH_SRC
,
609 spec
.eth
->src
.addr_bytes
) ||
610 !mnl_attr_put_check(buf
, size
,
611 TCA_FLOWER_KEY_ETH_SRC_MASK
,
613 mask
.eth
->src
.addr_bytes
))))
618 if (item
->type
!= RTE_FLOW_ITEM_TYPE_VLAN
)
620 mask
.vlan
= mlx5_nl_flow_item_mask
621 (item
, &rte_flow_item_vlan_mask
,
622 &mlx5_nl_flow_mask_supported
.vlan
,
623 &mlx5_nl_flow_mask_empty
.vlan
,
624 sizeof(mlx5_nl_flow_mask_supported
.vlan
), error
);
628 !mnl_attr_put_u16_check(buf
, size
,
629 TCA_FLOWER_KEY_ETH_TYPE
,
630 RTE_BE16(ETH_P_8021Q
)))
634 if (mask
.vlan
== &mlx5_nl_flow_mask_empty
.vlan
) {
638 spec
.vlan
= item
->spec
;
639 if ((mask
.vlan
->tci
& RTE_BE16(0xe000) &&
640 (mask
.vlan
->tci
& RTE_BE16(0xe000)) != RTE_BE16(0xe000)) ||
641 (mask
.vlan
->tci
& RTE_BE16(0x0fff) &&
642 (mask
.vlan
->tci
& RTE_BE16(0x0fff)) != RTE_BE16(0x0fff)) ||
643 (mask
.vlan
->inner_type
&&
644 mask
.vlan
->inner_type
!= RTE_BE16(0xffff)))
645 return rte_flow_error_set
646 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
648 "no support for partial masks on"
649 " \"tci\" (PCP and VID parts) and"
650 " \"inner_type\" fields");
651 if (mask
.vlan
->inner_type
) {
652 if (!mnl_attr_put_u16_check
653 (buf
, size
, TCA_FLOWER_KEY_VLAN_ETH_TYPE
,
654 spec
.vlan
->inner_type
))
656 vlan_eth_type_set
= 1;
658 if ((mask
.vlan
->tci
& RTE_BE16(0xe000) &&
659 !mnl_attr_put_u8_check
660 (buf
, size
, TCA_FLOWER_KEY_VLAN_PRIO
,
661 (rte_be_to_cpu_16(spec
.vlan
->tci
) >> 13) & 0x7)) ||
662 (mask
.vlan
->tci
& RTE_BE16(0x0fff) &&
663 !mnl_attr_put_u16_check
664 (buf
, size
, TCA_FLOWER_KEY_VLAN_ID
,
665 rte_be_to_cpu_16(spec
.vlan
->tci
& RTE_BE16(0x0fff)))))
670 if (item
->type
!= RTE_FLOW_ITEM_TYPE_IPV4
)
672 mask
.ipv4
= mlx5_nl_flow_item_mask
673 (item
, &rte_flow_item_ipv4_mask
,
674 &mlx5_nl_flow_mask_supported
.ipv4
,
675 &mlx5_nl_flow_mask_empty
.ipv4
,
676 sizeof(mlx5_nl_flow_mask_supported
.ipv4
), error
);
679 if ((!eth_type_set
|| !vlan_eth_type_set
) &&
680 !mnl_attr_put_u16_check(buf
, size
,
682 TCA_FLOWER_KEY_VLAN_ETH_TYPE
:
683 TCA_FLOWER_KEY_ETH_TYPE
,
687 vlan_eth_type_set
= 1;
688 if (mask
.ipv4
== &mlx5_nl_flow_mask_empty
.ipv4
) {
692 spec
.ipv4
= item
->spec
;
693 if (mask
.ipv4
->hdr
.next_proto_id
&&
694 mask
.ipv4
->hdr
.next_proto_id
!= 0xff)
695 return rte_flow_error_set
696 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
698 "no support for partial mask on"
699 " \"hdr.next_proto_id\" field");
700 if (mask
.ipv4
->hdr
.next_proto_id
) {
701 if (!mnl_attr_put_u8_check
702 (buf
, size
, TCA_FLOWER_KEY_IP_PROTO
,
703 spec
.ipv4
->hdr
.next_proto_id
))
707 if ((mask
.ipv4
->hdr
.src_addr
&&
708 (!mnl_attr_put_u32_check(buf
, size
,
709 TCA_FLOWER_KEY_IPV4_SRC
,
710 spec
.ipv4
->hdr
.src_addr
) ||
711 !mnl_attr_put_u32_check(buf
, size
,
712 TCA_FLOWER_KEY_IPV4_SRC_MASK
,
713 mask
.ipv4
->hdr
.src_addr
))) ||
714 (mask
.ipv4
->hdr
.dst_addr
&&
715 (!mnl_attr_put_u32_check(buf
, size
,
716 TCA_FLOWER_KEY_IPV4_DST
,
717 spec
.ipv4
->hdr
.dst_addr
) ||
718 !mnl_attr_put_u32_check(buf
, size
,
719 TCA_FLOWER_KEY_IPV4_DST_MASK
,
720 mask
.ipv4
->hdr
.dst_addr
))))
725 if (item
->type
!= RTE_FLOW_ITEM_TYPE_IPV6
)
727 mask
.ipv6
= mlx5_nl_flow_item_mask
728 (item
, &rte_flow_item_ipv6_mask
,
729 &mlx5_nl_flow_mask_supported
.ipv6
,
730 &mlx5_nl_flow_mask_empty
.ipv6
,
731 sizeof(mlx5_nl_flow_mask_supported
.ipv6
), error
);
734 if ((!eth_type_set
|| !vlan_eth_type_set
) &&
735 !mnl_attr_put_u16_check(buf
, size
,
737 TCA_FLOWER_KEY_VLAN_ETH_TYPE
:
738 TCA_FLOWER_KEY_ETH_TYPE
,
739 RTE_BE16(ETH_P_IPV6
)))
742 vlan_eth_type_set
= 1;
743 if (mask
.ipv6
== &mlx5_nl_flow_mask_empty
.ipv6
) {
747 spec
.ipv6
= item
->spec
;
748 if (mask
.ipv6
->hdr
.proto
&& mask
.ipv6
->hdr
.proto
!= 0xff)
749 return rte_flow_error_set
750 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
752 "no support for partial mask on"
753 " \"hdr.proto\" field");
754 if (mask
.ipv6
->hdr
.proto
) {
755 if (!mnl_attr_put_u8_check
756 (buf
, size
, TCA_FLOWER_KEY_IP_PROTO
,
757 spec
.ipv6
->hdr
.proto
))
761 if ((!IN6_IS_ADDR_UNSPECIFIED(mask
.ipv6
->hdr
.src_addr
) &&
762 (!mnl_attr_put_check(buf
, size
,
763 TCA_FLOWER_KEY_IPV6_SRC
,
764 sizeof(spec
.ipv6
->hdr
.src_addr
),
765 spec
.ipv6
->hdr
.src_addr
) ||
766 !mnl_attr_put_check(buf
, size
,
767 TCA_FLOWER_KEY_IPV6_SRC_MASK
,
768 sizeof(mask
.ipv6
->hdr
.src_addr
),
769 mask
.ipv6
->hdr
.src_addr
))) ||
770 (!IN6_IS_ADDR_UNSPECIFIED(mask
.ipv6
->hdr
.dst_addr
) &&
771 (!mnl_attr_put_check(buf
, size
,
772 TCA_FLOWER_KEY_IPV6_DST
,
773 sizeof(spec
.ipv6
->hdr
.dst_addr
),
774 spec
.ipv6
->hdr
.dst_addr
) ||
775 !mnl_attr_put_check(buf
, size
,
776 TCA_FLOWER_KEY_IPV6_DST_MASK
,
777 sizeof(mask
.ipv6
->hdr
.dst_addr
),
778 mask
.ipv6
->hdr
.dst_addr
))))
783 if (item
->type
!= RTE_FLOW_ITEM_TYPE_TCP
)
785 mask
.tcp
= mlx5_nl_flow_item_mask
786 (item
, &rte_flow_item_tcp_mask
,
787 &mlx5_nl_flow_mask_supported
.tcp
,
788 &mlx5_nl_flow_mask_empty
.tcp
,
789 sizeof(mlx5_nl_flow_mask_supported
.tcp
), error
);
793 !mnl_attr_put_u8_check(buf
, size
,
794 TCA_FLOWER_KEY_IP_PROTO
,
797 if (mask
.tcp
== &mlx5_nl_flow_mask_empty
.tcp
) {
801 spec
.tcp
= item
->spec
;
802 if ((mask
.tcp
->hdr
.src_port
&&
803 mask
.tcp
->hdr
.src_port
!= RTE_BE16(0xffff)) ||
804 (mask
.tcp
->hdr
.dst_port
&&
805 mask
.tcp
->hdr
.dst_port
!= RTE_BE16(0xffff)))
806 return rte_flow_error_set
807 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
809 "no support for partial masks on"
810 " \"hdr.src_port\" and \"hdr.dst_port\""
812 if ((mask
.tcp
->hdr
.src_port
&&
813 (!mnl_attr_put_u16_check(buf
, size
,
814 TCA_FLOWER_KEY_TCP_SRC
,
815 spec
.tcp
->hdr
.src_port
) ||
816 !mnl_attr_put_u16_check(buf
, size
,
817 TCA_FLOWER_KEY_TCP_SRC_MASK
,
818 mask
.tcp
->hdr
.src_port
))) ||
819 (mask
.tcp
->hdr
.dst_port
&&
820 (!mnl_attr_put_u16_check(buf
, size
,
821 TCA_FLOWER_KEY_TCP_DST
,
822 spec
.tcp
->hdr
.dst_port
) ||
823 !mnl_attr_put_u16_check(buf
, size
,
824 TCA_FLOWER_KEY_TCP_DST_MASK
,
825 mask
.tcp
->hdr
.dst_port
))))
830 if (item
->type
!= RTE_FLOW_ITEM_TYPE_UDP
)
832 mask
.udp
= mlx5_nl_flow_item_mask
833 (item
, &rte_flow_item_udp_mask
,
834 &mlx5_nl_flow_mask_supported
.udp
,
835 &mlx5_nl_flow_mask_empty
.udp
,
836 sizeof(mlx5_nl_flow_mask_supported
.udp
), error
);
840 !mnl_attr_put_u8_check(buf
, size
,
841 TCA_FLOWER_KEY_IP_PROTO
,
844 if (mask
.udp
== &mlx5_nl_flow_mask_empty
.udp
) {
848 spec
.udp
= item
->spec
;
849 if ((mask
.udp
->hdr
.src_port
&&
850 mask
.udp
->hdr
.src_port
!= RTE_BE16(0xffff)) ||
851 (mask
.udp
->hdr
.dst_port
&&
852 mask
.udp
->hdr
.dst_port
!= RTE_BE16(0xffff)))
853 return rte_flow_error_set
854 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
856 "no support for partial masks on"
857 " \"hdr.src_port\" and \"hdr.dst_port\""
859 if ((mask
.udp
->hdr
.src_port
&&
860 (!mnl_attr_put_u16_check(buf
, size
,
861 TCA_FLOWER_KEY_UDP_SRC
,
862 spec
.udp
->hdr
.src_port
) ||
863 !mnl_attr_put_u16_check(buf
, size
,
864 TCA_FLOWER_KEY_UDP_SRC_MASK
,
865 mask
.udp
->hdr
.src_port
))) ||
866 (mask
.udp
->hdr
.dst_port
&&
867 (!mnl_attr_put_u16_check(buf
, size
,
868 TCA_FLOWER_KEY_UDP_DST
,
869 spec
.udp
->hdr
.dst_port
) ||
870 !mnl_attr_put_u16_check(buf
, size
,
871 TCA_FLOWER_KEY_UDP_DST_MASK
,
872 mask
.udp
->hdr
.dst_port
))))
877 if (item
->type
!= RTE_FLOW_ITEM_TYPE_END
)
880 assert(!na_flower_act
);
882 mnl_attr_nest_start_check(buf
, size
, TCA_FLOWER_ACT
);
888 if (action
->type
!= RTE_FLOW_ACTION_TYPE_VOID
)
893 if (action
->type
!= RTE_FLOW_ACTION_TYPE_PORT_ID
)
895 conf
.port_id
= action
->conf
;
896 if (conf
.port_id
->original
)
899 for (i
= 0; ptoi
[i
].ifindex
; ++i
)
900 if (ptoi
[i
].port_id
== conf
.port_id
->id
)
902 if (!ptoi
[i
].ifindex
)
903 return rte_flow_error_set
904 (error
, ENODEV
, RTE_FLOW_ERROR_TYPE_ACTION_CONF
,
906 "missing data to convert port ID to ifindex");
908 mnl_attr_nest_start_check(buf
, size
, act_index_cur
++);
910 !mnl_attr_put_strz_check(buf
, size
, TCA_ACT_KIND
, "mirred"))
912 act
= mnl_attr_nest_start_check(buf
, size
, TCA_ACT_OPTIONS
);
915 if (!mnl_attr_put_check(buf
, size
, TCA_MIRRED_PARMS
,
916 sizeof(struct tc_mirred
),
918 .action
= TC_ACT_STOLEN
,
919 .eaction
= TCA_EGRESS_REDIR
,
920 .ifindex
= ptoi
[i
].ifindex
,
923 mnl_attr_nest_end(buf
, act
);
924 mnl_attr_nest_end(buf
, act_index
);
928 if (action
->type
!= RTE_FLOW_ACTION_TYPE_DROP
)
931 mnl_attr_nest_start_check(buf
, size
, act_index_cur
++);
933 !mnl_attr_put_strz_check(buf
, size
, TCA_ACT_KIND
, "gact"))
935 act
= mnl_attr_nest_start_check(buf
, size
, TCA_ACT_OPTIONS
);
938 if (!mnl_attr_put_check(buf
, size
, TCA_GACT_PARMS
,
939 sizeof(struct tc_gact
),
941 .action
= TC_ACT_SHOT
,
944 mnl_attr_nest_end(buf
, act
);
945 mnl_attr_nest_end(buf
, act_index
);
948 case ACTION_OF_POP_VLAN
:
949 if (action
->type
!= RTE_FLOW_ACTION_TYPE_OF_POP_VLAN
)
951 conf
.of_push_vlan
= NULL
;
952 i
= TCA_VLAN_ACT_POP
;
954 case ACTION_OF_PUSH_VLAN
:
955 if (action
->type
!= RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN
)
957 conf
.of_push_vlan
= action
->conf
;
958 i
= TCA_VLAN_ACT_PUSH
;
960 case ACTION_OF_SET_VLAN_VID
:
961 if (action
->type
!= RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID
)
963 conf
.of_set_vlan_vid
= action
->conf
;
965 goto override_na_vlan_id
;
966 i
= TCA_VLAN_ACT_MODIFY
;
968 case ACTION_OF_SET_VLAN_PCP
:
969 if (action
->type
!= RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP
)
971 conf
.of_set_vlan_pcp
= action
->conf
;
972 if (na_vlan_priority
)
973 goto override_na_vlan_priority
;
974 i
= TCA_VLAN_ACT_MODIFY
;
978 mnl_attr_nest_start_check(buf
, size
, act_index_cur
++);
980 !mnl_attr_put_strz_check(buf
, size
, TCA_ACT_KIND
, "vlan"))
982 act
= mnl_attr_nest_start_check(buf
, size
, TCA_ACT_OPTIONS
);
985 if (!mnl_attr_put_check(buf
, size
, TCA_VLAN_PARMS
,
986 sizeof(struct tc_vlan
),
988 .action
= TC_ACT_PIPE
,
992 if (i
== TCA_VLAN_ACT_POP
) {
993 mnl_attr_nest_end(buf
, act
);
994 mnl_attr_nest_end(buf
, act_index
);
998 if (i
== TCA_VLAN_ACT_PUSH
&&
999 !mnl_attr_put_u16_check(buf
, size
,
1000 TCA_VLAN_PUSH_VLAN_PROTOCOL
,
1001 conf
.of_push_vlan
->ethertype
))
1003 na_vlan_id
= mnl_nlmsg_get_payload_tail(buf
);
1004 if (!mnl_attr_put_u16_check(buf
, size
, TCA_VLAN_PAD
, 0))
1006 na_vlan_priority
= mnl_nlmsg_get_payload_tail(buf
);
1007 if (!mnl_attr_put_u8_check(buf
, size
, TCA_VLAN_PAD
, 0))
1009 mnl_attr_nest_end(buf
, act
);
1010 mnl_attr_nest_end(buf
, act_index
);
1011 if (action
->type
== RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID
) {
1012 override_na_vlan_id
:
1013 na_vlan_id
->nla_type
= TCA_VLAN_PUSH_VLAN_ID
;
1014 *(uint16_t *)mnl_attr_get_payload(na_vlan_id
) =
1016 (conf
.of_set_vlan_vid
->vlan_vid
);
1017 } else if (action
->type
==
1018 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP
) {
1019 override_na_vlan_priority
:
1020 na_vlan_priority
->nla_type
=
1021 TCA_VLAN_PUSH_VLAN_PRIORITY
;
1022 *(uint8_t *)mnl_attr_get_payload(na_vlan_priority
) =
1023 conf
.of_set_vlan_pcp
->vlan_pcp
;
1028 if (item
->type
!= RTE_FLOW_ITEM_TYPE_END
||
1029 action
->type
!= RTE_FLOW_ACTION_TYPE_END
)
1032 mnl_attr_nest_end(buf
, na_flower_act
);
1034 mnl_attr_nest_end(buf
, na_flower
);
1036 return nlh
->nlmsg_len
;
1039 trans
= mlx5_nl_flow_trans
[trans
[n
- 1]];
1043 if (buf
!= buf_tmp
) {
1045 size
= sizeof(buf_tmp
);
1048 return rte_flow_error_set
1049 (error
, ENOBUFS
, RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
1050 "generated TC message is too large");
1054 * Brand rtnetlink buffer with unique handle.
1056 * This handle should be unique for a given network interface to avoid
1060 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1062 * Unique 32-bit handle to use.
1065 mlx5_nl_flow_brand(void *buf
, uint32_t handle
)
1067 struct tcmsg
*tcm
= mnl_nlmsg_get_payload(buf
);
1069 tcm
->tcm_handle
= handle
;
1073 * Send Netlink message with acknowledgment.
1076 * Libmnl socket to use.
1078 * Message to send. This function always raises the NLM_F_ACK flag before
1082 * 0 on success, a negative errno value otherwise and rte_errno is set.
1085 mlx5_nl_flow_nl_ack(struct mnl_socket
*nl
, struct nlmsghdr
*nlh
)
1087 alignas(struct nlmsghdr
)
1088 uint8_t ans
[mnl_nlmsg_size(sizeof(struct nlmsgerr
)) +
1089 nlh
->nlmsg_len
- sizeof(*nlh
)];
1090 uint32_t seq
= random();
1093 nlh
->nlmsg_flags
|= NLM_F_ACK
;
1094 nlh
->nlmsg_seq
= seq
;
1095 ret
= mnl_socket_sendto(nl
, nlh
, nlh
->nlmsg_len
);
1097 ret
= mnl_socket_recvfrom(nl
, ans
, sizeof(ans
));
1100 (ans
, ret
, seq
, mnl_socket_get_portid(nl
), NULL
, NULL
);
1108 * Create a Netlink flow rule.
1111 * Libmnl socket to use.
1113 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1115 * Perform verbose error reporting if not NULL.
1118 * 0 on success, a negative errno value otherwise and rte_errno is set.
1121 mlx5_nl_flow_create(struct mnl_socket
*nl
, void *buf
,
1122 struct rte_flow_error
*error
)
1124 struct nlmsghdr
*nlh
= buf
;
1126 nlh
->nlmsg_type
= RTM_NEWTFILTER
;
1127 nlh
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
1128 if (!mlx5_nl_flow_nl_ack(nl
, nlh
))
1130 return rte_flow_error_set
1131 (error
, rte_errno
, RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
1132 "netlink: failed to create TC flow rule");
1136 * Destroy a Netlink flow rule.
1139 * Libmnl socket to use.
1141 * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
1143 * Perform verbose error reporting if not NULL.
1146 * 0 on success, a negative errno value otherwise and rte_errno is set.
1149 mlx5_nl_flow_destroy(struct mnl_socket
*nl
, void *buf
,
1150 struct rte_flow_error
*error
)
1152 struct nlmsghdr
*nlh
= buf
;
1154 nlh
->nlmsg_type
= RTM_DELTFILTER
;
1155 nlh
->nlmsg_flags
= NLM_F_REQUEST
;
1156 if (!mlx5_nl_flow_nl_ack(nl
, nlh
))
1158 return rte_flow_error_set
1159 (error
, errno
, RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
1160 "netlink: failed to destroy TC flow rule");
1164 * Initialize ingress qdisc of a given network interface.
1167 * Libmnl socket of the @p NETLINK_ROUTE kind.
1169 * Index of network interface to initialize.
1171 * Perform verbose error reporting if not NULL.
1174 * 0 on success, a negative errno value otherwise and rte_errno is set.
1177 mlx5_nl_flow_init(struct mnl_socket
*nl
, unsigned int ifindex
,
1178 struct rte_flow_error
*error
)
1180 struct nlmsghdr
*nlh
;
1182 alignas(struct nlmsghdr
)
1183 uint8_t buf
[mnl_nlmsg_size(sizeof(*tcm
) + 128)];
1185 /* Destroy existing ingress qdisc and everything attached to it. */
1186 nlh
= mnl_nlmsg_put_header(buf
);
1187 nlh
->nlmsg_type
= RTM_DELQDISC
;
1188 nlh
->nlmsg_flags
= NLM_F_REQUEST
;
1189 tcm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*tcm
));
1190 tcm
->tcm_family
= AF_UNSPEC
;
1191 tcm
->tcm_ifindex
= ifindex
;
1192 tcm
->tcm_handle
= TC_H_MAKE(TC_H_INGRESS
, 0);
1193 tcm
->tcm_parent
= TC_H_INGRESS
;
1194 /* Ignore errors when qdisc is already absent. */
1195 if (mlx5_nl_flow_nl_ack(nl
, nlh
) &&
1196 rte_errno
!= EINVAL
&& rte_errno
!= ENOENT
)
1197 return rte_flow_error_set
1198 (error
, rte_errno
, RTE_FLOW_ERROR_TYPE_UNSPECIFIED
,
1199 NULL
, "netlink: failed to remove ingress qdisc");
1200 /* Create fresh ingress qdisc. */
1201 nlh
= mnl_nlmsg_put_header(buf
);
1202 nlh
->nlmsg_type
= RTM_NEWQDISC
;
1203 nlh
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
1204 tcm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*tcm
));
1205 tcm
->tcm_family
= AF_UNSPEC
;
1206 tcm
->tcm_ifindex
= ifindex
;
1207 tcm
->tcm_handle
= TC_H_MAKE(TC_H_INGRESS
, 0);
1208 tcm
->tcm_parent
= TC_H_INGRESS
;
1209 mnl_attr_put_strz_check(nlh
, sizeof(buf
), TCA_KIND
, "ingress");
1210 if (mlx5_nl_flow_nl_ack(nl
, nlh
))
1211 return rte_flow_error_set
1212 (error
, rte_errno
, RTE_FLOW_ERROR_TYPE_UNSPECIFIED
,
1213 NULL
, "netlink: failed to create ingress qdisc");
1218 * Create and configure a libmnl socket for Netlink flow rules.
1221 * A valid libmnl socket object pointer on success, NULL otherwise and
1225 mlx5_nl_flow_socket_create(void)
1227 struct mnl_socket
*nl
= mnl_socket_open(NETLINK_ROUTE
);
1230 mnl_socket_setsockopt(nl
, NETLINK_CAP_ACK
, &(int){ 1 },
1232 if (!mnl_socket_bind(nl
, 0, MNL_SOCKET_AUTOPID
))
1237 mnl_socket_close(nl
);
1242 * Destroy a libmnl socket.
1245 mlx5_nl_flow_socket_destroy(struct mnl_socket
*nl
)
1247 mnl_socket_close(nl
);