1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
8 #include <libmnl/libmnl.h>
9 #include <linux/gen_stats.h>
10 #include <linux/if_ether.h>
11 #include <linux/netlink.h>
12 #include <linux/pkt_cls.h>
13 #include <linux/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/tc_act/tc_gact.h>
16 #include <linux/tc_act/tc_mirred.h>
17 #include <netinet/in.h>
23 #include <sys/socket.h>
25 #include <rte_byteorder.h>
26 #include <rte_errno.h>
27 #include <rte_ether.h>
29 #include <rte_malloc.h>
30 #include <rte_common.h>
31 #include <rte_cycles.h>
34 #include "mlx5_flow.h"
35 #include "mlx5_autoconf.h"
37 #ifdef HAVE_TC_ACT_VLAN
39 #include <linux/tc_act/tc_vlan.h>
41 #else /* HAVE_TC_ACT_VLAN */
43 #define TCA_VLAN_ACT_POP 1
44 #define TCA_VLAN_ACT_PUSH 2
45 #define TCA_VLAN_ACT_MODIFY 3
46 #define TCA_VLAN_PARMS 2
47 #define TCA_VLAN_PUSH_VLAN_ID 3
48 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
49 #define TCA_VLAN_PAD 5
50 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
57 #endif /* HAVE_TC_ACT_VLAN */
59 #ifdef HAVE_TC_ACT_PEDIT
61 #include <linux/tc_act/tc_pedit.h>
63 #else /* HAVE_TC_ACT_VLAN */
77 TCA_PEDIT_KEY_EX_HTYPE
= 1,
78 TCA_PEDIT_KEY_EX_CMD
= 2,
79 __TCA_PEDIT_KEY_EX_MAX
82 enum pedit_header_type
{
83 TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK
= 0,
84 TCA_PEDIT_KEY_EX_HDR_TYPE_ETH
= 1,
85 TCA_PEDIT_KEY_EX_HDR_TYPE_IP4
= 2,
86 TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
= 3,
87 TCA_PEDIT_KEY_EX_HDR_TYPE_TCP
= 4,
88 TCA_PEDIT_KEY_EX_HDR_TYPE_UDP
= 5,
93 TCA_PEDIT_KEY_EX_CMD_SET
= 0,
94 TCA_PEDIT_KEY_EX_CMD_ADD
= 1,
101 __u32 off
; /*offset */
108 struct tc_pedit_sel
{
112 struct tc_pedit_key keys
[0];
115 #endif /* HAVE_TC_ACT_VLAN */
117 #ifdef HAVE_TC_ACT_TUNNEL_KEY
119 #include <linux/tc_act/tc_tunnel_key.h>
121 #ifndef HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT
122 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
125 #ifndef HAVE_TCA_TUNNEL_KEY_NO_CSUM
126 #define TCA_TUNNEL_KEY_NO_CSUM 10
129 #ifndef HAVE_TCA_TUNNEL_KEY_ENC_TOS
130 #define TCA_TUNNEL_KEY_ENC_TOS 12
133 #ifndef HAVE_TCA_TUNNEL_KEY_ENC_TTL
134 #define TCA_TUNNEL_KEY_ENC_TTL 13
137 #else /* HAVE_TC_ACT_TUNNEL_KEY */
139 #define TCA_ACT_TUNNEL_KEY 17
140 #define TCA_TUNNEL_KEY_ACT_SET 1
141 #define TCA_TUNNEL_KEY_ACT_RELEASE 2
142 #define TCA_TUNNEL_KEY_PARMS 2
143 #define TCA_TUNNEL_KEY_ENC_IPV4_SRC 3
144 #define TCA_TUNNEL_KEY_ENC_IPV4_DST 4
145 #define TCA_TUNNEL_KEY_ENC_IPV6_SRC 5
146 #define TCA_TUNNEL_KEY_ENC_IPV6_DST 6
147 #define TCA_TUNNEL_KEY_ENC_KEY_ID 7
148 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
149 #define TCA_TUNNEL_KEY_NO_CSUM 10
150 #define TCA_TUNNEL_KEY_ENC_TOS 12
151 #define TCA_TUNNEL_KEY_ENC_TTL 13
153 struct tc_tunnel_key
{
158 #endif /* HAVE_TC_ACT_TUNNEL_KEY */
160 /* Normally found in linux/netlink.h. */
161 #ifndef NETLINK_CAP_ACK
162 #define NETLINK_CAP_ACK 10
165 /* Normally found in linux/pkt_sched.h. */
166 #ifndef TC_H_MIN_INGRESS
167 #define TC_H_MIN_INGRESS 0xfff2u
170 /* Normally found in linux/pkt_cls.h. */
171 #ifndef TCA_CLS_FLAGS_SKIP_SW
172 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
174 #ifndef TCA_CLS_FLAGS_IN_HW
175 #define TCA_CLS_FLAGS_IN_HW (1 << 2)
177 #ifndef HAVE_TCA_CHAIN
180 #ifndef HAVE_TCA_FLOWER_ACT
181 #define TCA_FLOWER_ACT 3
183 #ifndef HAVE_TCA_FLOWER_FLAGS
184 #define TCA_FLOWER_FLAGS 22
186 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
187 #define TCA_FLOWER_KEY_ETH_TYPE 8
189 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
190 #define TCA_FLOWER_KEY_ETH_DST 4
192 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
193 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
195 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
196 #define TCA_FLOWER_KEY_ETH_SRC 6
198 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
199 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
201 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
202 #define TCA_FLOWER_KEY_IP_PROTO 9
204 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
205 #define TCA_FLOWER_KEY_IPV4_SRC 10
207 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
208 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
210 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
211 #define TCA_FLOWER_KEY_IPV4_DST 12
213 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
214 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
216 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
217 #define TCA_FLOWER_KEY_IPV6_SRC 14
219 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
220 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
222 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
223 #define TCA_FLOWER_KEY_IPV6_DST 16
225 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
226 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
228 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
229 #define TCA_FLOWER_KEY_TCP_SRC 18
231 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
232 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
234 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
235 #define TCA_FLOWER_KEY_TCP_DST 19
237 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
238 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
240 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
241 #define TCA_FLOWER_KEY_UDP_SRC 20
243 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
244 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
246 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
247 #define TCA_FLOWER_KEY_UDP_DST 21
249 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
250 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
252 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
253 #define TCA_FLOWER_KEY_VLAN_ID 23
255 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
256 #define TCA_FLOWER_KEY_VLAN_PRIO 24
258 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
259 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
261 #ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID
262 #define TCA_FLOWER_KEY_ENC_KEY_ID 26
264 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC
265 #define TCA_FLOWER_KEY_ENC_IPV4_SRC 27
267 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
268 #define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28
270 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST
271 #define TCA_FLOWER_KEY_ENC_IPV4_DST 29
273 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
274 #define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30
276 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC
277 #define TCA_FLOWER_KEY_ENC_IPV6_SRC 31
279 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
280 #define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32
282 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST
283 #define TCA_FLOWER_KEY_ENC_IPV6_DST 33
285 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
286 #define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34
288 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
289 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43
291 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
292 #define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44
294 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT
295 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45
297 #ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
298 #define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46
300 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
301 #define TCA_FLOWER_KEY_TCP_FLAGS 71
303 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
304 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
306 #ifndef HAVE_TCA_FLOWER_KEY_IP_TOS
307 #define TCA_FLOWER_KEY_IP_TOS 73
309 #ifndef HAVE_TCA_FLOWER_KEY_IP_TOS_MASK
310 #define TCA_FLOWER_KEY_IP_TOS_MASK 74
312 #ifndef HAVE_TCA_FLOWER_KEY_IP_TTL
313 #define TCA_FLOWER_KEY_IP_TTL 75
315 #ifndef HAVE_TCA_FLOWER_KEY_IP_TTL_MASK
316 #define TCA_FLOWER_KEY_IP_TTL_MASK 76
318 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IP_TOS
319 #define TCA_FLOWER_KEY_ENC_IP_TOS 80
321 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IP_TOS_MASK
322 #define TCA_FLOWER_KEY_ENC_IP_TOS_MASK 81
324 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IP_TTL
325 #define TCA_FLOWER_KEY_ENC_IP_TTL 82
327 #ifndef HAVE_TCA_FLOWER_KEY_ENC_IP_TTL_MASK
328 #define TCA_FLOWER_KEY_ENC_IP_TTL_MASK 83
331 #ifndef HAVE_TC_ACT_GOTO_CHAIN
332 #define TC_ACT_GOTO_CHAIN 0x20000000
335 #ifndef IPV6_ADDR_LEN
336 #define IPV6_ADDR_LEN 16
339 #ifndef IPV4_ADDR_LEN
340 #define IPV4_ADDR_LEN 4
344 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
351 #ifndef TCA_ACT_MAX_PRIO
352 #define TCA_ACT_MAX_PRIO 32
355 /** Parameters of VXLAN devices created by driver. */
356 #define MLX5_VXLAN_DEFAULT_VNI 1
357 #define MLX5_VXLAN_DEVICE_PFX "vmlx_"
359 * Timeout in milliseconds to wait VXLAN UDP offloaded port
360 * registration completed within the mlx5 driver.
362 #define MLX5_VXLAN_WAIT_PORT_REG_MS 250
364 /** Tunnel action type, used for @p type in header structure. */
365 enum flow_tcf_tunact_type
{
366 FLOW_TCF_TUNACT_VXLAN_DECAP
,
367 FLOW_TCF_TUNACT_VXLAN_ENCAP
,
370 /** Flags used for @p mask in tunnel action encap descriptors. */
371 #define FLOW_TCF_ENCAP_ETH_SRC (1u << 0)
372 #define FLOW_TCF_ENCAP_ETH_DST (1u << 1)
373 #define FLOW_TCF_ENCAP_IPV4_SRC (1u << 2)
374 #define FLOW_TCF_ENCAP_IPV4_DST (1u << 3)
375 #define FLOW_TCF_ENCAP_IPV6_SRC (1u << 4)
376 #define FLOW_TCF_ENCAP_IPV6_DST (1u << 5)
377 #define FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
378 #define FLOW_TCF_ENCAP_UDP_DST (1u << 7)
379 #define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
380 #define FLOW_TCF_ENCAP_IP_TTL (1u << 9)
381 #define FLOW_TCF_ENCAP_IP_TOS (1u << 10)
384 * Structure for holding netlink context.
385 * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
386 * Using this (8KB) buffer size ensures that netlink messages will never be
389 struct mlx5_flow_tcf_context
{
390 struct mnl_socket
*nl
; /* NETLINK_ROUTE libmnl socket. */
391 uint32_t seq
; /* Message sequence number. */
392 uint32_t buf_size
; /* Message buffer size. */
393 uint8_t *buf
; /* Message buffer. */
397 * Neigh rule structure. The neigh rule is applied via Netlink to
398 * outer tunnel iface in order to provide destination MAC address
399 * for the VXLAN encapsultion. The neigh rule is implicitly related
400 * to the Flow itself and can be shared by multiple Flows.
402 struct tcf_neigh_rule
{
403 LIST_ENTRY(tcf_neigh_rule
) next
;
405 struct ether_addr eth
;
412 uint8_t dst
[IPV6_ADDR_LEN
];
418 * Local rule structure. The local rule is applied via Netlink to
419 * outer tunnel iface in order to provide local and peer IP addresses
420 * of the VXLAN tunnel for encapsulation. The local rule is implicitly
421 * related to the Flow itself and can be shared by multiple Flows.
423 struct tcf_local_rule
{
424 LIST_ENTRY(tcf_local_rule
) next
;
433 uint8_t dst
[IPV6_ADDR_LEN
];
434 uint8_t src
[IPV6_ADDR_LEN
];
439 /** Outer interface VXLAN encapsulation rules container. */
441 LIST_ENTRY(tcf_irule
) next
;
442 LIST_HEAD(, tcf_neigh_rule
) neigh
;
443 LIST_HEAD(, tcf_local_rule
) local
;
445 unsigned int ifouter
; /**< Own interface index. */
448 /** VXLAN virtual netdev. */
450 LIST_ENTRY(tcf_vtep
) next
;
452 unsigned int ifindex
; /**< Own interface index. */
454 uint32_t created
:1; /**< Actually created by PMD. */
455 uint32_t waitreg
:1; /**< Wait for VXLAN UDP port registration. */
458 /** Tunnel descriptor header, common for all tunnel types. */
459 struct flow_tcf_tunnel_hdr
{
460 uint32_t type
; /**< Tunnel action type. */
461 struct tcf_vtep
*vtep
; /**< Virtual tunnel endpoint device. */
462 unsigned int ifindex_org
; /**< Original dst/src interface */
463 unsigned int *ifindex_ptr
; /**< Interface ptr in message. */
466 struct flow_tcf_vxlan_decap
{
467 struct flow_tcf_tunnel_hdr hdr
;
471 struct flow_tcf_vxlan_encap
{
472 struct flow_tcf_tunnel_hdr hdr
;
473 struct tcf_irule
*iface
;
478 struct ether_addr dst
;
479 struct ether_addr src
;
487 uint8_t dst
[IPV6_ADDR_LEN
];
488 uint8_t src
[IPV6_ADDR_LEN
];
500 /** Structure used when extracting the values of a flow counters
501 * from a netlink message.
503 struct flow_tcf_stats_basic
{
505 struct gnet_stats_basic counters
;
508 /** Empty masks for known item types. */
510 struct rte_flow_item_port_id port_id
;
511 struct rte_flow_item_eth eth
;
512 struct rte_flow_item_vlan vlan
;
513 struct rte_flow_item_ipv4 ipv4
;
514 struct rte_flow_item_ipv6 ipv6
;
515 struct rte_flow_item_tcp tcp
;
516 struct rte_flow_item_udp udp
;
517 struct rte_flow_item_vxlan vxlan
;
518 } flow_tcf_mask_empty
= {
522 /** Supported masks for known item types. */
523 static const struct {
524 struct rte_flow_item_port_id port_id
;
525 struct rte_flow_item_eth eth
;
526 struct rte_flow_item_vlan vlan
;
527 struct rte_flow_item_ipv4 ipv4
;
528 struct rte_flow_item_ipv6 ipv6
;
529 struct rte_flow_item_tcp tcp
;
530 struct rte_flow_item_udp udp
;
531 struct rte_flow_item_vxlan vxlan
;
532 } flow_tcf_mask_supported
= {
537 .type
= RTE_BE16(0xffff),
538 .dst
.addr_bytes
= "\xff\xff\xff\xff\xff\xff",
539 .src
.addr_bytes
= "\xff\xff\xff\xff\xff\xff",
542 /* PCP and VID only, no DEI. */
543 .tci
= RTE_BE16(0xefff),
544 .inner_type
= RTE_BE16(0xffff),
547 .next_proto_id
= 0xff,
548 .time_to_live
= 0xff,
549 .type_of_service
= 0xff,
550 .src_addr
= RTE_BE32(0xffffffff),
551 .dst_addr
= RTE_BE32(0xffffffff),
555 .vtc_flow
= RTE_BE32(0xfful
<< IPV6_HDR_FL_SHIFT
),
558 "\xff\xff\xff\xff\xff\xff\xff\xff"
559 "\xff\xff\xff\xff\xff\xff\xff\xff",
561 "\xff\xff\xff\xff\xff\xff\xff\xff"
562 "\xff\xff\xff\xff\xff\xff\xff\xff",
565 .src_port
= RTE_BE16(0xffff),
566 .dst_port
= RTE_BE16(0xffff),
570 .src_port
= RTE_BE16(0xffff),
571 .dst_port
= RTE_BE16(0xffff),
574 .vni
= "\xff\xff\xff",
578 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
579 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
580 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
581 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
582 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
584 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
586 /** DPDK port to network interface index (ifindex) conversion. */
587 struct flow_tcf_ptoi
{
588 uint16_t port_id
; /**< DPDK port ID. */
589 unsigned int ifindex
; /**< Network interface index. */
592 /* Due to a limitation on driver/FW. */
593 #define MLX5_TCF_GROUP_ID_MAX 3
596 * Due to a limitation on driver/FW, priority ranges from 1 to 16 in kernel.
597 * Priority in rte_flow attribute starts from 0 and is added by 1 in
598 * translation. This is subject to be changed to determine the max priority
599 * based on trial-and-error like Verbs driver once the restriction is lifted or
600 * the range is extended.
602 #define MLX5_TCF_GROUP_PRIORITY_MAX 15
604 #define MLX5_TCF_FATE_ACTIONS \
605 (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
606 MLX5_FLOW_ACTION_JUMP)
608 #define MLX5_TCF_VLAN_ACTIONS \
609 (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
610 MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
612 #define MLX5_TCF_VXLAN_ACTIONS \
613 (MLX5_FLOW_ACTION_VXLAN_ENCAP | MLX5_FLOW_ACTION_VXLAN_DECAP)
615 #define MLX5_TCF_PEDIT_ACTIONS \
616 (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
617 MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
618 MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
619 MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
620 MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
622 #define MLX5_TCF_CONFIG_ACTIONS \
623 (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
624 MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
625 MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
626 (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
628 #define MAX_PEDIT_KEYS 128
629 #define SZ_PEDIT_KEY_VAL 4
631 #define NUM_OF_PEDIT_KEYS(sz) \
632 (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
634 struct pedit_key_ex
{
635 enum pedit_header_type htype
;
639 struct pedit_parser
{
640 struct tc_pedit_sel sel
;
641 struct tc_pedit_key keys
[MAX_PEDIT_KEYS
];
642 struct pedit_key_ex keys_ex
[MAX_PEDIT_KEYS
];
646 * Create space for using the implicitly created TC flow counter.
649 * Pointer to the Ethernet device structure.
652 * A pointer to the counter data structure, NULL otherwise and
655 static struct mlx5_flow_counter
*
656 flow_tcf_counter_new(void)
658 struct mlx5_flow_counter
*cnt
;
661 * eswitch counter cannot be shared and its id is unknown.
662 * currently returning all with id 0.
663 * in the future maybe better to switch to unique numbers.
665 struct mlx5_flow_counter tmpl
= {
668 cnt
= rte_calloc(__func__
, 1, sizeof(*cnt
), 0);
674 /* Implicit counter, do not add to list. */
679 * Set pedit key of MAC address
682 * pointer to action specification
683 * @param[in,out] p_parser
684 * pointer to pedit_parser
687 flow_tcf_pedit_key_set_mac(const struct rte_flow_action
*actions
,
688 struct pedit_parser
*p_parser
)
690 int idx
= p_parser
->sel
.nkeys
;
691 uint32_t off
= actions
->type
== RTE_FLOW_ACTION_TYPE_SET_MAC_SRC
?
692 offsetof(struct ether_hdr
, s_addr
) :
693 offsetof(struct ether_hdr
, d_addr
);
694 const struct rte_flow_action_set_mac
*conf
=
695 (const struct rte_flow_action_set_mac
*)actions
->conf
;
697 p_parser
->keys
[idx
].off
= off
;
698 p_parser
->keys
[idx
].mask
= ~UINT32_MAX
;
699 p_parser
->keys_ex
[idx
].htype
= TCA_PEDIT_KEY_EX_HDR_TYPE_ETH
;
700 p_parser
->keys_ex
[idx
].cmd
= TCA_PEDIT_KEY_EX_CMD_SET
;
701 memcpy(&p_parser
->keys
[idx
].val
,
702 conf
->mac_addr
, SZ_PEDIT_KEY_VAL
);
704 p_parser
->keys
[idx
].off
= off
+ SZ_PEDIT_KEY_VAL
;
705 p_parser
->keys
[idx
].mask
= 0xFFFF0000;
706 p_parser
->keys_ex
[idx
].htype
= TCA_PEDIT_KEY_EX_HDR_TYPE_ETH
;
707 p_parser
->keys_ex
[idx
].cmd
= TCA_PEDIT_KEY_EX_CMD_SET
;
708 memcpy(&p_parser
->keys
[idx
].val
,
709 conf
->mac_addr
+ SZ_PEDIT_KEY_VAL
,
710 ETHER_ADDR_LEN
- SZ_PEDIT_KEY_VAL
);
711 p_parser
->sel
.nkeys
= (++idx
);
715 * Set pedit key of decrease/set ttl
718 * pointer to action specification
719 * @param[in,out] p_parser
720 * pointer to pedit_parser
721 * @param[in] item_flags
722 * flags of all items presented
725 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action
*actions
,
726 struct pedit_parser
*p_parser
,
729 int idx
= p_parser
->sel
.nkeys
;
731 p_parser
->keys
[idx
].mask
= 0xFFFFFF00;
732 if (item_flags
& MLX5_FLOW_LAYER_OUTER_L3_IPV4
) {
733 p_parser
->keys_ex
[idx
].htype
= TCA_PEDIT_KEY_EX_HDR_TYPE_IP4
;
734 p_parser
->keys
[idx
].off
=
735 offsetof(struct ipv4_hdr
, time_to_live
);
737 if (item_flags
& MLX5_FLOW_LAYER_OUTER_L3_IPV6
) {
738 p_parser
->keys_ex
[idx
].htype
= TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
;
739 p_parser
->keys
[idx
].off
=
740 offsetof(struct ipv6_hdr
, hop_limits
);
742 if (actions
->type
== RTE_FLOW_ACTION_TYPE_DEC_TTL
) {
743 p_parser
->keys_ex
[idx
].cmd
= TCA_PEDIT_KEY_EX_CMD_ADD
;
744 p_parser
->keys
[idx
].val
= 0x000000FF;
746 p_parser
->keys_ex
[idx
].cmd
= TCA_PEDIT_KEY_EX_CMD_SET
;
747 p_parser
->keys
[idx
].val
=
748 (__u32
)((const struct rte_flow_action_set_ttl
*)
749 actions
->conf
)->ttl_value
;
751 p_parser
->sel
.nkeys
= (++idx
);
755 * Set pedit key of transport (TCP/UDP) port value
758 * pointer to action specification
759 * @param[in,out] p_parser
760 * pointer to pedit_parser
761 * @param[in] item_flags
762 * flags of all items presented
765 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action
*actions
,
766 struct pedit_parser
*p_parser
,
769 int idx
= p_parser
->sel
.nkeys
;
771 if (item_flags
& MLX5_FLOW_LAYER_OUTER_L4_UDP
)
772 p_parser
->keys_ex
[idx
].htype
= TCA_PEDIT_KEY_EX_HDR_TYPE_UDP
;
773 if (item_flags
& MLX5_FLOW_LAYER_OUTER_L4_TCP
)
774 p_parser
->keys_ex
[idx
].htype
= TCA_PEDIT_KEY_EX_HDR_TYPE_TCP
;
775 p_parser
->keys_ex
[idx
].cmd
= TCA_PEDIT_KEY_EX_CMD_SET
;
776 /* offset of src/dst port is same for TCP and UDP */
777 p_parser
->keys
[idx
].off
=
778 actions
->type
== RTE_FLOW_ACTION_TYPE_SET_TP_SRC
?
779 offsetof(struct tcp_hdr
, src_port
) :
780 offsetof(struct tcp_hdr
, dst_port
);
781 p_parser
->keys
[idx
].mask
= 0xFFFF0000;
782 p_parser
->keys
[idx
].val
=
783 (__u32
)((const struct rte_flow_action_set_tp
*)
784 actions
->conf
)->port
;
785 p_parser
->sel
.nkeys
= (++idx
);
789 * Set pedit key of ipv6 address
792 * pointer to action specification
793 * @param[in,out] p_parser
794 * pointer to pedit_parser
797 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action
*actions
,
798 struct pedit_parser
*p_parser
)
800 int idx
= p_parser
->sel
.nkeys
;
801 int keys
= NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN
);
803 actions
->type
== RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
?
804 offsetof(struct ipv6_hdr
, src_addr
) :
805 offsetof(struct ipv6_hdr
, dst_addr
);
806 const struct rte_flow_action_set_ipv6
*conf
=
807 (const struct rte_flow_action_set_ipv6
*)actions
->conf
;
809 for (int i
= 0; i
< keys
; i
++, idx
++) {
810 p_parser
->keys_ex
[idx
].htype
= TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
;
811 p_parser
->keys_ex
[idx
].cmd
= TCA_PEDIT_KEY_EX_CMD_SET
;
812 p_parser
->keys
[idx
].off
= off_base
+ i
* SZ_PEDIT_KEY_VAL
;
813 p_parser
->keys
[idx
].mask
= ~UINT32_MAX
;
814 memcpy(&p_parser
->keys
[idx
].val
,
815 conf
->ipv6_addr
+ i
* SZ_PEDIT_KEY_VAL
,
818 p_parser
->sel
.nkeys
+= keys
;
822 * Set pedit key of ipv4 address
825 * pointer to action specification
826 * @param[in,out] p_parser
827 * pointer to pedit_parser
830 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action
*actions
,
831 struct pedit_parser
*p_parser
)
833 int idx
= p_parser
->sel
.nkeys
;
835 p_parser
->keys_ex
[idx
].htype
= TCA_PEDIT_KEY_EX_HDR_TYPE_IP4
;
836 p_parser
->keys_ex
[idx
].cmd
= TCA_PEDIT_KEY_EX_CMD_SET
;
837 p_parser
->keys
[idx
].off
=
838 actions
->type
== RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
?
839 offsetof(struct ipv4_hdr
, src_addr
) :
840 offsetof(struct ipv4_hdr
, dst_addr
);
841 p_parser
->keys
[idx
].mask
= ~UINT32_MAX
;
842 p_parser
->keys
[idx
].val
=
843 ((const struct rte_flow_action_set_ipv4
*)
844 actions
->conf
)->ipv4_addr
;
845 p_parser
->sel
.nkeys
= (++idx
);
849 * Create the pedit's na attribute in netlink message
850 * on pre-allocate message buffer
853 * pointer to pre-allocated netlink message buffer
854 * @param[in,out] actions
855 * pointer to pointer of actions specification.
856 * @param[in,out] action_flags
857 * pointer to actions flags
858 * @param[in] item_flags
859 * flags of all item presented
862 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr
*nl
,
863 const struct rte_flow_action
**actions
,
866 struct pedit_parser p_parser
;
867 struct nlattr
*na_act_options
;
868 struct nlattr
*na_pedit_keys
;
870 memset(&p_parser
, 0, sizeof(p_parser
));
871 mnl_attr_put_strz(nl
, TCA_ACT_KIND
, "pedit");
872 na_act_options
= mnl_attr_nest_start(nl
, TCA_ACT_OPTIONS
);
873 /* all modify header actions should be in one tc-pedit action */
874 for (; (*actions
)->type
!= RTE_FLOW_ACTION_TYPE_END
; (*actions
)++) {
875 switch ((*actions
)->type
) {
876 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
:
877 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
:
878 flow_tcf_pedit_key_set_ipv4_addr(*actions
, &p_parser
);
880 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
:
881 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
:
882 flow_tcf_pedit_key_set_ipv6_addr(*actions
, &p_parser
);
884 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC
:
885 case RTE_FLOW_ACTION_TYPE_SET_TP_DST
:
886 flow_tcf_pedit_key_set_tp_port(*actions
,
887 &p_parser
, item_flags
);
889 case RTE_FLOW_ACTION_TYPE_SET_TTL
:
890 case RTE_FLOW_ACTION_TYPE_DEC_TTL
:
891 flow_tcf_pedit_key_set_dec_ttl(*actions
,
892 &p_parser
, item_flags
);
894 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC
:
895 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST
:
896 flow_tcf_pedit_key_set_mac(*actions
, &p_parser
);
899 goto pedit_mnl_msg_done
;
903 p_parser
.sel
.action
= TC_ACT_PIPE
;
904 mnl_attr_put(nl
, TCA_PEDIT_PARMS_EX
,
905 sizeof(p_parser
.sel
) +
906 p_parser
.sel
.nkeys
* sizeof(struct tc_pedit_key
),
909 mnl_attr_nest_start(nl
, TCA_PEDIT_KEYS_EX
| NLA_F_NESTED
);
910 for (int i
= 0; i
< p_parser
.sel
.nkeys
; i
++) {
911 struct nlattr
*na_pedit_key
=
912 mnl_attr_nest_start(nl
,
913 TCA_PEDIT_KEY_EX
| NLA_F_NESTED
);
914 mnl_attr_put_u16(nl
, TCA_PEDIT_KEY_EX_HTYPE
,
915 p_parser
.keys_ex
[i
].htype
);
916 mnl_attr_put_u16(nl
, TCA_PEDIT_KEY_EX_CMD
,
917 p_parser
.keys_ex
[i
].cmd
);
918 mnl_attr_nest_end(nl
, na_pedit_key
);
920 mnl_attr_nest_end(nl
, na_pedit_keys
);
921 mnl_attr_nest_end(nl
, na_act_options
);
926 * Calculate max memory size of one TC-pedit actions.
927 * One TC-pedit action can contain set of keys each defining
928 * a rewrite element (rte_flow action)
930 * @param[in,out] actions
931 * actions specification.
932 * @param[in,out] action_flags
934 * @param[in,out] size
937 * Max memory size of one TC-pedit action
940 flow_tcf_get_pedit_actions_size(const struct rte_flow_action
**actions
,
941 uint64_t *action_flags
)
947 pedit_size
+= SZ_NLATTR_NEST
+ /* na_act_index. */
948 SZ_NLATTR_STRZ_OF("pedit") +
949 SZ_NLATTR_NEST
; /* TCA_ACT_OPTIONS. */
950 for (; (*actions
)->type
!= RTE_FLOW_ACTION_TYPE_END
; (*actions
)++) {
951 switch ((*actions
)->type
) {
952 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
:
953 keys
+= NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN
);
954 flags
|= MLX5_FLOW_ACTION_SET_IPV4_SRC
;
956 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
:
957 keys
+= NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN
);
958 flags
|= MLX5_FLOW_ACTION_SET_IPV4_DST
;
960 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
:
961 keys
+= NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN
);
962 flags
|= MLX5_FLOW_ACTION_SET_IPV6_SRC
;
964 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
:
965 keys
+= NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN
);
966 flags
|= MLX5_FLOW_ACTION_SET_IPV6_DST
;
968 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC
:
969 /* TCP is as same as UDP */
970 keys
+= NUM_OF_PEDIT_KEYS(TP_PORT_LEN
);
971 flags
|= MLX5_FLOW_ACTION_SET_TP_SRC
;
973 case RTE_FLOW_ACTION_TYPE_SET_TP_DST
:
974 /* TCP is as same as UDP */
975 keys
+= NUM_OF_PEDIT_KEYS(TP_PORT_LEN
);
976 flags
|= MLX5_FLOW_ACTION_SET_TP_DST
;
978 case RTE_FLOW_ACTION_TYPE_SET_TTL
:
979 keys
+= NUM_OF_PEDIT_KEYS(TTL_LEN
);
980 flags
|= MLX5_FLOW_ACTION_SET_TTL
;
982 case RTE_FLOW_ACTION_TYPE_DEC_TTL
:
983 keys
+= NUM_OF_PEDIT_KEYS(TTL_LEN
);
984 flags
|= MLX5_FLOW_ACTION_DEC_TTL
;
986 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC
:
987 keys
+= NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN
);
988 flags
|= MLX5_FLOW_ACTION_SET_MAC_SRC
;
990 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST
:
991 keys
+= NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN
);
992 flags
|= MLX5_FLOW_ACTION_SET_MAC_DST
;
995 goto get_pedit_action_size_done
;
998 get_pedit_action_size_done
:
999 /* TCA_PEDIT_PARAMS_EX */
1001 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel
) +
1002 keys
* sizeof(struct tc_pedit_key
));
1003 pedit_size
+= SZ_NLATTR_NEST
; /* TCA_PEDIT_KEYS */
1004 pedit_size
+= keys
*
1005 /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
1006 (SZ_NLATTR_NEST
+ SZ_NLATTR_DATA_OF(2) +
1007 SZ_NLATTR_DATA_OF(2));
1008 (*action_flags
) |= flags
;
1014 * Retrieve mask for pattern item.
1016 * This function does basic sanity checks on a pattern item in order to
1017 * return the most appropriate mask for it.
1020 * Item specification.
1021 * @param[in] mask_default
1022 * Default mask for pattern item as specified by the flow API.
1023 * @param[in] mask_supported
1024 * Mask fields supported by the implementation.
1025 * @param[in] mask_empty
1026 * Empty mask to return when there is no specification.
1028 * Perform verbose error reporting if not NULL.
1031 * Either @p item->mask or one of the mask parameters on success, NULL
1032 * otherwise and rte_errno is set.
1035 flow_tcf_item_mask(const struct rte_flow_item
*item
, const void *mask_default
,
1036 const void *mask_supported
, const void *mask_empty
,
1037 size_t mask_size
, struct rte_flow_error
*error
)
1039 const uint8_t *mask
;
1042 /* item->last and item->mask cannot exist without item->spec. */
1043 if (!item
->spec
&& (item
->mask
|| item
->last
)) {
1044 rte_flow_error_set(error
, EINVAL
,
1045 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1046 "\"mask\" or \"last\" field provided without"
1047 " a corresponding \"spec\"");
1050 /* No spec, no mask, no problem. */
1053 mask
= item
->mask
? item
->mask
: mask_default
;
1056 * Single-pass check to make sure that:
1057 * - Mask is supported, no bits are set outside mask_supported.
1058 * - Both item->spec and item->last are included in mask.
1060 for (i
= 0; i
!= mask_size
; ++i
) {
1063 if ((mask
[i
] | ((const uint8_t *)mask_supported
)[i
]) !=
1064 ((const uint8_t *)mask_supported
)[i
]) {
1065 rte_flow_error_set(error
, ENOTSUP
,
1066 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1067 "unsupported field found"
1072 (((const uint8_t *)item
->spec
)[i
] & mask
[i
]) !=
1073 (((const uint8_t *)item
->last
)[i
] & mask
[i
])) {
1074 rte_flow_error_set(error
, EINVAL
,
1075 RTE_FLOW_ERROR_TYPE_ITEM_LAST
,
1077 "range between \"spec\" and \"last\""
1078 " not comprised in \"mask\"");
1086 * Build a conversion table between port ID and ifindex.
1089 * Pointer to Ethernet device.
1091 * Pointer to ptoi table.
1093 * Size of ptoi table provided.
1096 * Size of ptoi table filled.
1099 flow_tcf_build_ptoi_table(struct rte_eth_dev
*dev
, struct flow_tcf_ptoi
*ptoi
,
1102 unsigned int n
= mlx5_dev_to_port_id(dev
->device
, NULL
, 0);
1103 uint16_t port_id
[n
+ 1];
1105 unsigned int own
= 0;
1107 /* At least one port is needed when no switch domain is present. */
1110 port_id
[0] = dev
->data
->port_id
;
1112 n
= RTE_MIN(mlx5_dev_to_port_id(dev
->device
, port_id
, n
), n
);
1116 for (i
= 0; i
!= n
; ++i
) {
1117 struct rte_eth_dev_info dev_info
;
1119 rte_eth_dev_info_get(port_id
[i
], &dev_info
);
1120 if (port_id
[i
] == dev
->data
->port_id
)
1122 ptoi
[i
].port_id
= port_id
[i
];
1123 ptoi
[i
].ifindex
= dev_info
.if_index
;
1125 /* Ensure first entry of ptoi[] is the current device. */
1128 ptoi
[0] = ptoi
[own
];
1129 ptoi
[own
] = ptoi
[n
];
1131 /* An entry with zero ifindex terminates ptoi[]. */
1132 ptoi
[n
].port_id
= 0;
1133 ptoi
[n
].ifindex
= 0;
1138 * Verify the @p attr will be correctly understood by the E-switch.
1141 * Pointer to flow attributes
1143 * Pointer to error structure.
1146 * 0 on success, a negative errno value otherwise and rte_errno is set.
1149 flow_tcf_validate_attributes(const struct rte_flow_attr
*attr
,
1150 struct rte_flow_error
*error
)
1153 * Supported attributes: groups, some priorities and ingress only.
1154 * group is supported only if kernel supports chain. Don't care about
1155 * transfer as it is the caller's problem.
1157 if (attr
->group
> MLX5_TCF_GROUP_ID_MAX
)
1158 return rte_flow_error_set(error
, ENOTSUP
,
1159 RTE_FLOW_ERROR_TYPE_ATTR_GROUP
, attr
,
1160 "group ID larger than "
1161 RTE_STR(MLX5_TCF_GROUP_ID_MAX
)
1162 " isn't supported");
1163 else if (attr
->priority
> MLX5_TCF_GROUP_PRIORITY_MAX
)
1164 return rte_flow_error_set(error
, ENOTSUP
,
1165 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY
,
1167 "priority more than "
1168 RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX
)
1169 " is not supported");
1171 return rte_flow_error_set(error
, EINVAL
,
1172 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS
,
1173 attr
, "only ingress is supported");
1175 return rte_flow_error_set(error
, ENOTSUP
,
1176 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS
,
1177 attr
, "egress is not supported");
1182 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_ETH item for E-Switch.
1183 * The routine checks the L2 fields to be used in encapsulation header.
1186 * Pointer to the item structure.
1188 * Pointer to the error structure.
1191 * 0 on success, a negative errno value otherwise and rte_errno is set.
1194 flow_tcf_validate_vxlan_encap_eth(const struct rte_flow_item
*item
,
1195 struct rte_flow_error
*error
)
1197 const struct rte_flow_item_eth
*spec
= item
->spec
;
1198 const struct rte_flow_item_eth
*mask
= item
->mask
;
1202 * Specification for L2 addresses can be empty
1203 * because these ones are optional and not
1204 * required directly by tc rule. Kernel tries
1205 * to resolve these ones on its own
1210 /* If mask is not specified use the default one. */
1211 mask
= &rte_flow_item_eth_mask
;
1213 if (memcmp(&mask
->dst
,
1214 &flow_tcf_mask_empty
.eth
.dst
,
1215 sizeof(flow_tcf_mask_empty
.eth
.dst
))) {
1216 if (memcmp(&mask
->dst
,
1217 &rte_flow_item_eth_mask
.dst
,
1218 sizeof(rte_flow_item_eth_mask
.dst
)))
1219 return rte_flow_error_set
1221 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1222 "no support for partial mask on"
1223 " \"eth.dst\" field");
1225 if (memcmp(&mask
->src
,
1226 &flow_tcf_mask_empty
.eth
.src
,
1227 sizeof(flow_tcf_mask_empty
.eth
.src
))) {
1228 if (memcmp(&mask
->src
,
1229 &rte_flow_item_eth_mask
.src
,
1230 sizeof(rte_flow_item_eth_mask
.src
)))
1231 return rte_flow_error_set
1233 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1234 "no support for partial mask on"
1235 " \"eth.src\" field");
1237 if (mask
->type
!= RTE_BE16(0x0000)) {
1238 if (mask
->type
!= RTE_BE16(0xffff))
1239 return rte_flow_error_set
1241 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1242 "no support for partial mask on"
1243 " \"eth.type\" field");
1245 "outer ethernet type field"
1246 " cannot be forced for vxlan"
1247 " encapsulation, parameter ignored");
1253 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV4 item for E-Switch.
1254 * The routine checks the IPv4 fields to be used in encapsulation header.
1257 * Pointer to the item structure.
1259 * Pointer to the error structure.
1262 * 0 on success, a negative errno value otherwise and rte_errno is set.
1265 flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item
*item
,
1266 struct rte_flow_error
*error
)
1268 const struct rte_flow_item_ipv4
*spec
= item
->spec
;
1269 const struct rte_flow_item_ipv4
*mask
= item
->mask
;
1273 * Specification for IP addresses cannot be empty
1274 * because it is required by tunnel_key parameter.
1276 return rte_flow_error_set(error
, EINVAL
,
1277 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1278 "NULL outer ipv4 address"
1279 " specification for vxlan"
1283 mask
= &rte_flow_item_ipv4_mask
;
1284 if (mask
->hdr
.dst_addr
!= RTE_BE32(0x00000000)) {
1285 if (mask
->hdr
.dst_addr
!= RTE_BE32(0xffffffff))
1286 return rte_flow_error_set
1288 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1289 "no support for partial mask on"
1290 " \"ipv4.hdr.dst_addr\" field"
1291 " for vxlan encapsulation");
1292 /* More IPv4 address validations can be put here. */
1295 * Kernel uses the destination IP address to determine
1296 * the routing path and obtain the MAC destination
1297 * address, so IP destination address must be
1298 * specified in the tc rule.
1300 return rte_flow_error_set(error
, EINVAL
,
1301 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1302 "outer ipv4 destination address"
1303 " must be specified for"
1304 " vxlan encapsulation");
1306 if (mask
->hdr
.src_addr
!= RTE_BE32(0x00000000)) {
1307 if (mask
->hdr
.src_addr
!= RTE_BE32(0xffffffff))
1308 return rte_flow_error_set
1310 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1311 "no support for partial mask on"
1312 " \"ipv4.hdr.src_addr\" field"
1313 " for vxlan encapsulation");
1314 /* More IPv4 address validations can be put here. */
1317 * Kernel uses the source IP address to select the
1318 * interface for egress encapsulated traffic, so
1319 * it must be specified in the tc rule.
1321 return rte_flow_error_set(error
, EINVAL
,
1322 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1323 "outer ipv4 source address"
1324 " must be specified for"
1325 " vxlan encapsulation");
1327 if (mask
->hdr
.type_of_service
&&
1328 mask
->hdr
.type_of_service
!= 0xff)
1329 return rte_flow_error_set(error
, ENOTSUP
,
1330 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1331 "no support for partial mask on"
1332 " \"ipv4.hdr.type_of_service\" field"
1333 " for vxlan encapsulation");
1334 if (mask
->hdr
.time_to_live
&&
1335 mask
->hdr
.time_to_live
!= 0xff)
1336 return rte_flow_error_set(error
, ENOTSUP
,
1337 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1338 "no support for partial mask on"
1339 " \"ipv4.hdr.time_to_live\" field"
1340 " for vxlan encapsulation");
1345 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV6 item for E-Switch.
1346 * The routine checks the IPv6 fields to be used in encapsulation header.
1349 * Pointer to the item structure.
1351 * Pointer to the error structure.
1354 * 0 on success, a negative errno value otherwise and rte_errno is set.
1357 flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item
*item
,
1358 struct rte_flow_error
*error
)
1360 const struct rte_flow_item_ipv6
*spec
= item
->spec
;
1361 const struct rte_flow_item_ipv6
*mask
= item
->mask
;
1366 * Specification for IP addresses cannot be empty
1367 * because it is required by tunnel_key parameter.
1369 return rte_flow_error_set(error
, EINVAL
,
1370 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1371 "NULL outer ipv6 address"
1372 " specification for"
1373 " vxlan encapsulation");
1376 mask
= &rte_flow_item_ipv6_mask
;
1377 if (memcmp(&mask
->hdr
.dst_addr
,
1378 &flow_tcf_mask_empty
.ipv6
.hdr
.dst_addr
,
1380 if (memcmp(&mask
->hdr
.dst_addr
,
1381 &rte_flow_item_ipv6_mask
.hdr
.dst_addr
,
1383 return rte_flow_error_set
1385 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1386 "no support for partial mask on"
1387 " \"ipv6.hdr.dst_addr\" field"
1388 " for vxlan encapsulation");
1389 /* More IPv6 address validations can be put here. */
1392 * Kernel uses the destination IP address to determine
1393 * the routing path and obtain the MAC destination
1394 * address (heigh or gate), so IP destination address
1395 * must be specified within the tc rule.
1397 return rte_flow_error_set(error
, EINVAL
,
1398 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1399 "outer ipv6 destination address"
1400 " must be specified for"
1401 " vxlan encapsulation");
1403 if (memcmp(&mask
->hdr
.src_addr
,
1404 &flow_tcf_mask_empty
.ipv6
.hdr
.src_addr
,
1406 if (memcmp(&mask
->hdr
.src_addr
,
1407 &rte_flow_item_ipv6_mask
.hdr
.src_addr
,
1409 return rte_flow_error_set
1411 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1412 "no support for partial mask on"
1413 " \"ipv6.hdr.src_addr\" field"
1414 " for vxlan encapsulation");
1415 /* More L3 address validation can be put here. */
1418 * Kernel uses the source IP address to select the
1419 * interface for egress encapsulated traffic, so
1420 * it must be specified in the tc rule.
1422 return rte_flow_error_set(error
, EINVAL
,
1423 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1424 "outer L3 source address"
1425 " must be specified for"
1426 " vxlan encapsulation");
1428 msk6
= (rte_be_to_cpu_32(mask
->hdr
.vtc_flow
) >>
1429 IPV6_HDR_TC_SHIFT
) & 0xff;
1430 if (msk6
&& msk6
!= 0xff)
1431 return rte_flow_error_set(error
, ENOTSUP
,
1432 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1433 "no support for partial mask on"
1434 " \"ipv6.hdr.vtc_flow.tos\" field"
1435 " for vxlan encapsulation");
1436 if (mask
->hdr
.hop_limits
&& mask
->hdr
.hop_limits
!= 0xff)
1437 return rte_flow_error_set(error
, ENOTSUP
,
1438 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1439 "no support for partial mask on"
1440 " \"ipv6.hdr.hop_limits\" field"
1441 " for vxlan encapsulation");
1446 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_UDP item for E-Switch.
1447 * The routine checks the UDP fields to be used in encapsulation header.
1450 * Pointer to the item structure.
1452 * Pointer to the error structure.
1455 * 0 on success, a negative errno value otherwise and rte_errno is set.
1458 flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item
*item
,
1459 struct rte_flow_error
*error
)
1461 const struct rte_flow_item_udp
*spec
= item
->spec
;
1462 const struct rte_flow_item_udp
*mask
= item
->mask
;
1466 * Specification for UDP ports cannot be empty
1467 * because it is required by tunnel_key parameter.
1469 return rte_flow_error_set(error
, EINVAL
,
1470 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1471 "NULL UDP port specification "
1472 " for vxlan encapsulation");
1475 mask
= &rte_flow_item_udp_mask
;
1476 if (mask
->hdr
.dst_port
!= RTE_BE16(0x0000)) {
1477 if (mask
->hdr
.dst_port
!= RTE_BE16(0xffff))
1478 return rte_flow_error_set
1480 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1481 "no support for partial mask on"
1482 " \"udp.hdr.dst_port\" field"
1483 " for vxlan encapsulation");
1484 if (!spec
->hdr
.dst_port
)
1485 return rte_flow_error_set
1487 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1488 "outer UDP remote port cannot be"
1489 " 0 for vxlan encapsulation");
1491 return rte_flow_error_set(error
, EINVAL
,
1492 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1493 "outer UDP remote port"
1494 " must be specified for"
1495 " vxlan encapsulation");
1497 if (mask
->hdr
.src_port
!= RTE_BE16(0x0000)) {
1498 if (mask
->hdr
.src_port
!= RTE_BE16(0xffff))
1499 return rte_flow_error_set
1501 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1502 "no support for partial mask on"
1503 " \"udp.hdr.src_port\" field"
1504 " for vxlan encapsulation");
1506 "outer UDP source port cannot be"
1507 " forced for vxlan encapsulation,"
1508 " parameter ignored");
1514 * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_VXLAN item for E-Switch.
1515 * The routine checks the VNIP fields to be used in encapsulation header.
1518 * Pointer to the item structure.
1520 * Pointer to the error structure.
1523 * 0 on success, a negative errno value otherwise and rte_errno is set.
1526 flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item
*item
,
1527 struct rte_flow_error
*error
)
1529 const struct rte_flow_item_vxlan
*spec
= item
->spec
;
1530 const struct rte_flow_item_vxlan
*mask
= item
->mask
;
1533 /* Outer VNI is required by tunnel_key parameter. */
1534 return rte_flow_error_set(error
, EINVAL
,
1535 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1536 "NULL VNI specification"
1537 " for vxlan encapsulation");
1540 mask
= &rte_flow_item_vxlan_mask
;
1541 if (!mask
->vni
[0] && !mask
->vni
[1] && !mask
->vni
[2])
1542 return rte_flow_error_set(error
, EINVAL
,
1543 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1544 "outer VNI must be specified "
1545 "for vxlan encapsulation");
1546 if (mask
->vni
[0] != 0xff ||
1547 mask
->vni
[1] != 0xff ||
1548 mask
->vni
[2] != 0xff)
1549 return rte_flow_error_set(error
, ENOTSUP
,
1550 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1551 "no support for partial mask on"
1552 " \"vxlan.vni\" field");
1554 if (!spec
->vni
[0] && !spec
->vni
[1] && !spec
->vni
[2])
1555 return rte_flow_error_set(error
, EINVAL
,
1556 RTE_FLOW_ERROR_TYPE_ITEM
, item
,
1557 "vxlan vni cannot be 0");
1562 * Validate VXLAN_ENCAP action item list for E-Switch.
1563 * The routine checks items to be used in encapsulation header.
1566 * Pointer to the VXLAN_ENCAP action structure.
1568 * Pointer to the error structure.
1571 * 0 on success, a negative errno value otherwise and rte_errno is set.
1574 flow_tcf_validate_vxlan_encap(const struct rte_flow_action
*action
,
1575 struct rte_flow_error
*error
)
1577 const struct rte_flow_item
*items
;
1579 uint32_t item_flags
= 0;
1582 return rte_flow_error_set(error
, EINVAL
,
1583 RTE_FLOW_ERROR_TYPE_ACTION
, action
,
1584 "Missing vxlan tunnel"
1585 " action configuration");
1586 items
= ((const struct rte_flow_action_vxlan_encap
*)
1587 action
->conf
)->definition
;
1589 return rte_flow_error_set(error
, EINVAL
,
1590 RTE_FLOW_ERROR_TYPE_ACTION
, action
,
1591 "Missing vxlan tunnel"
1592 " encapsulation parameters");
1593 for (; items
->type
!= RTE_FLOW_ITEM_TYPE_END
; items
++) {
1594 switch (items
->type
) {
1595 case RTE_FLOW_ITEM_TYPE_VOID
:
1597 case RTE_FLOW_ITEM_TYPE_ETH
:
1598 ret
= mlx5_flow_validate_item_eth(items
, item_flags
,
1602 ret
= flow_tcf_validate_vxlan_encap_eth(items
, error
);
1605 item_flags
|= MLX5_FLOW_LAYER_OUTER_L2
;
1608 case RTE_FLOW_ITEM_TYPE_IPV4
:
1609 ret
= mlx5_flow_validate_item_ipv4
1611 &flow_tcf_mask_supported
.ipv4
, error
);
1614 ret
= flow_tcf_validate_vxlan_encap_ipv4(items
, error
);
1617 item_flags
|= MLX5_FLOW_LAYER_OUTER_L3_IPV4
;
1619 case RTE_FLOW_ITEM_TYPE_IPV6
:
1620 ret
= mlx5_flow_validate_item_ipv6
1622 &flow_tcf_mask_supported
.ipv6
, error
);
1625 ret
= flow_tcf_validate_vxlan_encap_ipv6(items
, error
);
1628 item_flags
|= MLX5_FLOW_LAYER_OUTER_L3_IPV6
;
1630 case RTE_FLOW_ITEM_TYPE_UDP
:
1631 ret
= mlx5_flow_validate_item_udp(items
, item_flags
,
1635 ret
= flow_tcf_validate_vxlan_encap_udp(items
, error
);
1638 item_flags
|= MLX5_FLOW_LAYER_OUTER_L4_UDP
;
1640 case RTE_FLOW_ITEM_TYPE_VXLAN
:
1641 ret
= mlx5_flow_validate_item_vxlan(items
,
1645 ret
= flow_tcf_validate_vxlan_encap_vni(items
, error
);
1648 item_flags
|= MLX5_FLOW_LAYER_VXLAN
;
1651 return rte_flow_error_set
1653 RTE_FLOW_ERROR_TYPE_ITEM
, items
,
1654 "vxlan encap item not supported");
1657 if (!(item_flags
& MLX5_FLOW_LAYER_OUTER_L3
))
1658 return rte_flow_error_set(error
, EINVAL
,
1659 RTE_FLOW_ERROR_TYPE_ACTION
, action
,
1660 "no outer IP layer found"
1661 " for vxlan encapsulation");
1662 if (!(item_flags
& MLX5_FLOW_LAYER_OUTER_L4_UDP
))
1663 return rte_flow_error_set(error
, EINVAL
,
1664 RTE_FLOW_ERROR_TYPE_ACTION
, action
,
1665 "no outer UDP layer found"
1666 " for vxlan encapsulation");
1667 if (!(item_flags
& MLX5_FLOW_LAYER_VXLAN
))
1668 return rte_flow_error_set(error
, EINVAL
,
1669 RTE_FLOW_ERROR_TYPE_ACTION
, action
,
1670 "no VXLAN VNI found"
1671 " for vxlan encapsulation");
1676 * Validate outer RTE_FLOW_ITEM_TYPE_UDP item if tunnel item
1677 * RTE_FLOW_ITEM_TYPE_VXLAN is present in item list.
1680 * Outer UDP layer item (if any, NULL otherwise).
1682 * Pointer to the error structure.
1685 * 0 on success, a negative errno value otherwise and rte_errno is set.
1688 flow_tcf_validate_vxlan_decap_udp(const struct rte_flow_item
*udp
,
1689 struct rte_flow_error
*error
)
1691 const struct rte_flow_item_udp
*spec
= udp
->spec
;
1692 const struct rte_flow_item_udp
*mask
= udp
->mask
;
1696 * Specification for UDP ports cannot be empty
1697 * because it is required as decap parameter.
1699 return rte_flow_error_set(error
, EINVAL
,
1700 RTE_FLOW_ERROR_TYPE_ITEM
, udp
,
1701 "NULL UDP port specification"
1702 " for VXLAN decapsulation");
1704 mask
= &rte_flow_item_udp_mask
;
1705 if (mask
->hdr
.dst_port
!= RTE_BE16(0x0000)) {
1706 if (mask
->hdr
.dst_port
!= RTE_BE16(0xffff))
1707 return rte_flow_error_set
1709 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1710 "no support for partial mask on"
1711 " \"udp.hdr.dst_port\" field");
1712 if (!spec
->hdr
.dst_port
)
1713 return rte_flow_error_set
1715 RTE_FLOW_ERROR_TYPE_ITEM
, udp
,
1716 "zero decap local UDP port");
1718 return rte_flow_error_set(error
, EINVAL
,
1719 RTE_FLOW_ERROR_TYPE_ITEM
, udp
,
1720 "outer UDP destination port must be "
1721 "specified for vxlan decapsulation");
1723 if (mask
->hdr
.src_port
!= RTE_BE16(0x0000)) {
1724 if (mask
->hdr
.src_port
!= RTE_BE16(0xffff))
1725 return rte_flow_error_set
1727 RTE_FLOW_ERROR_TYPE_ITEM_MASK
, mask
,
1728 "no support for partial mask on"
1729 " \"udp.hdr.src_port\" field");
1731 "outer UDP local port cannot be "
1732 "forced for VXLAN encapsulation, "
1733 "parameter ignored");
1739 * Validate flow for E-Switch.
1742 * Pointer to the priv structure.
1744 * Pointer to the flow attributes.
1746 * Pointer to the list of items.
1747 * @param[in] actions
1748 * Pointer to the list of actions.
1750 * Pointer to the error structure.
1753 * 0 on success, a negative errno value otherwise and rte_errno is set.
1756 flow_tcf_validate(struct rte_eth_dev
*dev
,
1757 const struct rte_flow_attr
*attr
,
1758 const struct rte_flow_item items
[],
1759 const struct rte_flow_action actions
[],
1760 struct rte_flow_error
*error
)
1763 const struct rte_flow_item_port_id
*port_id
;
1764 const struct rte_flow_item_eth
*eth
;
1765 const struct rte_flow_item_vlan
*vlan
;
1766 const struct rte_flow_item_ipv4
*ipv4
;
1767 const struct rte_flow_item_ipv6
*ipv6
;
1768 const struct rte_flow_item_tcp
*tcp
;
1769 const struct rte_flow_item_udp
*udp
;
1770 const struct rte_flow_item_vxlan
*vxlan
;
1773 const struct rte_flow_action_port_id
*port_id
;
1774 const struct rte_flow_action_jump
*jump
;
1775 const struct rte_flow_action_of_push_vlan
*of_push_vlan
;
1776 const struct rte_flow_action_of_set_vlan_vid
*
1778 const struct rte_flow_action_of_set_vlan_pcp
*
1780 const struct rte_flow_action_vxlan_encap
*vxlan_encap
;
1781 const struct rte_flow_action_set_ipv4
*set_ipv4
;
1782 const struct rte_flow_action_set_ipv6
*set_ipv6
;
1784 const struct rte_flow_item
*outer_udp
= NULL
;
1785 rte_be16_t inner_etype
= RTE_BE16(ETH_P_ALL
);
1786 rte_be16_t outer_etype
= RTE_BE16(ETH_P_ALL
);
1787 rte_be16_t vlan_etype
= RTE_BE16(ETH_P_ALL
);
1788 uint64_t item_flags
= 0;
1789 uint64_t action_flags
= 0;
1790 uint8_t next_protocol
= 0xff;
1791 unsigned int tcm_ifindex
= 0;
1792 uint8_t pedit_validated
= 0;
1793 struct flow_tcf_ptoi ptoi
[PTOI_TABLE_SZ_MAX(dev
)];
1794 struct rte_eth_dev
*port_id_dev
= NULL
;
1795 bool in_port_id_set
;
1798 claim_nonzero(flow_tcf_build_ptoi_table(dev
, ptoi
,
1799 PTOI_TABLE_SZ_MAX(dev
)));
1800 ret
= flow_tcf_validate_attributes(attr
, error
);
1803 for (; actions
->type
!= RTE_FLOW_ACTION_TYPE_END
; actions
++) {
1805 uint64_t current_action_flag
= 0;
1807 switch (actions
->type
) {
1808 case RTE_FLOW_ACTION_TYPE_VOID
:
1810 case RTE_FLOW_ACTION_TYPE_PORT_ID
:
1811 current_action_flag
= MLX5_FLOW_ACTION_PORT_ID
;
1814 conf
.port_id
= actions
->conf
;
1815 if (conf
.port_id
->original
)
1818 for (i
= 0; ptoi
[i
].ifindex
; ++i
)
1819 if (ptoi
[i
].port_id
== conf
.port_id
->id
)
1821 if (!ptoi
[i
].ifindex
)
1822 return rte_flow_error_set
1824 RTE_FLOW_ERROR_TYPE_ACTION_CONF
,
1826 "missing data to convert port ID to"
1828 port_id_dev
= &rte_eth_devices
[conf
.port_id
->id
];
1830 case RTE_FLOW_ACTION_TYPE_JUMP
:
1831 current_action_flag
= MLX5_FLOW_ACTION_JUMP
;
1834 conf
.jump
= actions
->conf
;
1835 if (attr
->group
>= conf
.jump
->group
)
1836 return rte_flow_error_set
1838 RTE_FLOW_ERROR_TYPE_ACTION
,
1840 "can jump only to a group forward");
1842 case RTE_FLOW_ACTION_TYPE_DROP
:
1843 current_action_flag
= MLX5_FLOW_ACTION_DROP
;
1845 case RTE_FLOW_ACTION_TYPE_COUNT
:
1847 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN
:
1848 current_action_flag
= MLX5_FLOW_ACTION_OF_POP_VLAN
;
1850 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN
: {
1851 rte_be16_t ethertype
;
1853 current_action_flag
= MLX5_FLOW_ACTION_OF_PUSH_VLAN
;
1856 conf
.of_push_vlan
= actions
->conf
;
1857 ethertype
= conf
.of_push_vlan
->ethertype
;
1858 if (ethertype
!= RTE_BE16(ETH_P_8021Q
) &&
1859 ethertype
!= RTE_BE16(ETH_P_8021AD
))
1860 return rte_flow_error_set
1862 RTE_FLOW_ERROR_TYPE_ACTION
, actions
,
1863 "vlan push TPID must be "
1864 "802.1Q or 802.1AD");
1867 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID
:
1868 if (!(action_flags
& MLX5_FLOW_ACTION_OF_PUSH_VLAN
))
1869 return rte_flow_error_set
1871 RTE_FLOW_ERROR_TYPE_ACTION
, actions
,
1872 "vlan modify is not supported,"
1873 " set action must follow push action");
1874 current_action_flag
= MLX5_FLOW_ACTION_OF_SET_VLAN_VID
;
1876 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP
:
1877 if (!(action_flags
& MLX5_FLOW_ACTION_OF_PUSH_VLAN
))
1878 return rte_flow_error_set
1880 RTE_FLOW_ERROR_TYPE_ACTION
, actions
,
1881 "vlan modify is not supported,"
1882 " set action must follow push action");
1883 current_action_flag
= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP
;
1885 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP
:
1886 current_action_flag
= MLX5_FLOW_ACTION_VXLAN_DECAP
;
1888 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP
:
1889 ret
= flow_tcf_validate_vxlan_encap(actions
, error
);
1892 current_action_flag
= MLX5_FLOW_ACTION_VXLAN_ENCAP
;
1894 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
:
1895 current_action_flag
= MLX5_FLOW_ACTION_SET_IPV4_SRC
;
1897 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
:
1898 current_action_flag
= MLX5_FLOW_ACTION_SET_IPV4_DST
;
1900 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
:
1901 current_action_flag
= MLX5_FLOW_ACTION_SET_IPV6_SRC
;
1903 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
:
1904 current_action_flag
= MLX5_FLOW_ACTION_SET_IPV6_DST
;
1906 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC
:
1907 current_action_flag
= MLX5_FLOW_ACTION_SET_TP_SRC
;
1909 case RTE_FLOW_ACTION_TYPE_SET_TP_DST
:
1910 current_action_flag
= MLX5_FLOW_ACTION_SET_TP_DST
;
1912 case RTE_FLOW_ACTION_TYPE_SET_TTL
:
1913 current_action_flag
= MLX5_FLOW_ACTION_SET_TTL
;
1915 case RTE_FLOW_ACTION_TYPE_DEC_TTL
:
1916 current_action_flag
= MLX5_FLOW_ACTION_DEC_TTL
;
1918 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC
:
1919 current_action_flag
= MLX5_FLOW_ACTION_SET_MAC_SRC
;
1921 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST
:
1922 current_action_flag
= MLX5_FLOW_ACTION_SET_MAC_DST
;
1925 return rte_flow_error_set(error
, ENOTSUP
,
1926 RTE_FLOW_ERROR_TYPE_ACTION
,
1928 "action not supported");
1930 if (current_action_flag
& MLX5_TCF_CONFIG_ACTIONS
) {
1932 return rte_flow_error_set
1934 RTE_FLOW_ERROR_TYPE_ACTION_CONF
,
1936 "action configuration not set");
1938 if ((current_action_flag
& MLX5_TCF_PEDIT_ACTIONS
) &&
1940 return rte_flow_error_set(error
, ENOTSUP
,
1941 RTE_FLOW_ERROR_TYPE_ACTION
,
1943 "set actions should be "
1944 "listed successively");
1945 if ((current_action_flag
& ~MLX5_TCF_PEDIT_ACTIONS
) &&
1946 (action_flags
& MLX5_TCF_PEDIT_ACTIONS
))
1947 pedit_validated
= 1;
1948 if ((current_action_flag
& MLX5_TCF_FATE_ACTIONS
) &&
1949 (action_flags
& MLX5_TCF_FATE_ACTIONS
))
1950 return rte_flow_error_set(error
, EINVAL
,
1951 RTE_FLOW_ERROR_TYPE_ACTION
,
1953 "can't have multiple fate"
1955 if ((current_action_flag
& MLX5_TCF_VXLAN_ACTIONS
) &&
1956 (action_flags
& MLX5_TCF_VXLAN_ACTIONS
))
1957 return rte_flow_error_set(error
, EINVAL
,
1958 RTE_FLOW_ERROR_TYPE_ACTION
,
1960 "can't have multiple vxlan"
1962 if ((current_action_flag
& MLX5_TCF_VXLAN_ACTIONS
) &&
1963 (action_flags
& MLX5_TCF_VLAN_ACTIONS
))
1964 return rte_flow_error_set(error
, ENOTSUP
,
1965 RTE_FLOW_ERROR_TYPE_ACTION
,
1967 "can't have vxlan and vlan"
1968 " actions in the same rule");
1969 action_flags
|= current_action_flag
;
1971 for (; items
->type
!= RTE_FLOW_ITEM_TYPE_END
; items
++) {
1974 switch (items
->type
) {
1975 case RTE_FLOW_ITEM_TYPE_VOID
:
1977 case RTE_FLOW_ITEM_TYPE_PORT_ID
:
1978 if (item_flags
& MLX5_FLOW_LAYER_TUNNEL
)
1979 return rte_flow_error_set
1981 RTE_FLOW_ERROR_TYPE_ITEM
, items
,
1982 "inner tunnel port id"
1983 " item is not supported");
1984 mask
.port_id
= flow_tcf_item_mask
1985 (items
, &rte_flow_item_port_id_mask
,
1986 &flow_tcf_mask_supported
.port_id
,
1987 &flow_tcf_mask_empty
.port_id
,
1988 sizeof(flow_tcf_mask_supported
.port_id
),
1992 if (mask
.port_id
== &flow_tcf_mask_empty
.port_id
) {
1996 spec
.port_id
= items
->spec
;
1997 if (mask
.port_id
->id
&& mask
.port_id
->id
!= 0xffffffff)
1998 return rte_flow_error_set
2000 RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
2002 "no support for partial mask on"
2004 if (!mask
.port_id
->id
)
2007 for (i
= 0; ptoi
[i
].ifindex
; ++i
)
2008 if (ptoi
[i
].port_id
== spec
.port_id
->id
)
2010 if (!ptoi
[i
].ifindex
)
2011 return rte_flow_error_set
2013 RTE_FLOW_ERROR_TYPE_ITEM_SPEC
,
2015 "missing data to convert port ID to"
2017 if (in_port_id_set
&& ptoi
[i
].ifindex
!= tcm_ifindex
)
2018 return rte_flow_error_set
2020 RTE_FLOW_ERROR_TYPE_ITEM_SPEC
,
2022 "cannot match traffic for"
2023 " several port IDs through"
2024 " a single flow rule");
2025 tcm_ifindex
= ptoi
[i
].ifindex
;
2028 case RTE_FLOW_ITEM_TYPE_ETH
:
2029 ret
= mlx5_flow_validate_item_eth(items
, item_flags
,
2033 item_flags
|= (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) ?
2034 MLX5_FLOW_LAYER_INNER_L2
:
2035 MLX5_FLOW_LAYER_OUTER_L2
;
2037 * Redundant check due to different supported mask.
2038 * Same for the rest of items.
2040 mask
.eth
= flow_tcf_item_mask
2041 (items
, &rte_flow_item_eth_mask
,
2042 &flow_tcf_mask_supported
.eth
,
2043 &flow_tcf_mask_empty
.eth
,
2044 sizeof(flow_tcf_mask_supported
.eth
),
2048 if (mask
.eth
->type
&& mask
.eth
->type
!=
2050 return rte_flow_error_set
2052 RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
2054 "no support for partial mask on"
2056 assert(items
->spec
);
2057 spec
.eth
= items
->spec
;
2058 if (mask
.eth
->type
&&
2059 (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) &&
2060 inner_etype
!= RTE_BE16(ETH_P_ALL
) &&
2061 inner_etype
!= spec
.eth
->type
)
2062 return rte_flow_error_set
2064 RTE_FLOW_ERROR_TYPE_ITEM
,
2066 "inner eth_type conflict");
2067 if (mask
.eth
->type
&&
2068 !(item_flags
& MLX5_FLOW_LAYER_TUNNEL
) &&
2069 outer_etype
!= RTE_BE16(ETH_P_ALL
) &&
2070 outer_etype
!= spec
.eth
->type
)
2071 return rte_flow_error_set
2073 RTE_FLOW_ERROR_TYPE_ITEM
,
2075 "outer eth_type conflict");
2076 if (mask
.eth
->type
) {
2077 if (item_flags
& MLX5_FLOW_LAYER_TUNNEL
)
2078 inner_etype
= spec
.eth
->type
;
2080 outer_etype
= spec
.eth
->type
;
2083 case RTE_FLOW_ITEM_TYPE_VLAN
:
2084 if (item_flags
& MLX5_FLOW_LAYER_TUNNEL
)
2085 return rte_flow_error_set
2087 RTE_FLOW_ERROR_TYPE_ITEM
, items
,
2089 " is not supported");
2090 ret
= mlx5_flow_validate_item_vlan(items
, item_flags
,
2094 item_flags
|= MLX5_FLOW_LAYER_OUTER_VLAN
;
2095 mask
.vlan
= flow_tcf_item_mask
2096 (items
, &rte_flow_item_vlan_mask
,
2097 &flow_tcf_mask_supported
.vlan
,
2098 &flow_tcf_mask_empty
.vlan
,
2099 sizeof(flow_tcf_mask_supported
.vlan
),
2103 if ((mask
.vlan
->tci
& RTE_BE16(0xe000) &&
2104 (mask
.vlan
->tci
& RTE_BE16(0xe000)) !=
2105 RTE_BE16(0xe000)) ||
2106 (mask
.vlan
->tci
& RTE_BE16(0x0fff) &&
2107 (mask
.vlan
->tci
& RTE_BE16(0x0fff)) !=
2108 RTE_BE16(0x0fff)) ||
2109 (mask
.vlan
->inner_type
&&
2110 mask
.vlan
->inner_type
!= RTE_BE16(0xffff)))
2111 return rte_flow_error_set
2113 RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
2115 "no support for partial masks on"
2116 " \"tci\" (PCP and VID parts) and"
2117 " \"inner_type\" fields");
2118 if (outer_etype
!= RTE_BE16(ETH_P_ALL
) &&
2119 outer_etype
!= RTE_BE16(ETH_P_8021Q
))
2120 return rte_flow_error_set
2122 RTE_FLOW_ERROR_TYPE_ITEM
,
2124 "outer eth_type conflict,"
2126 outer_etype
= RTE_BE16(ETH_P_8021Q
);
2127 assert(items
->spec
);
2128 spec
.vlan
= items
->spec
;
2129 if (mask
.vlan
->inner_type
&&
2130 vlan_etype
!= RTE_BE16(ETH_P_ALL
) &&
2131 vlan_etype
!= spec
.vlan
->inner_type
)
2132 return rte_flow_error_set
2134 RTE_FLOW_ERROR_TYPE_ITEM
,
2136 "vlan eth_type conflict");
2137 if (mask
.vlan
->inner_type
)
2138 vlan_etype
= spec
.vlan
->inner_type
;
2140 case RTE_FLOW_ITEM_TYPE_IPV4
:
2141 ret
= mlx5_flow_validate_item_ipv4
2143 &flow_tcf_mask_supported
.ipv4
, error
);
2146 item_flags
|= (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) ?
2147 MLX5_FLOW_LAYER_INNER_L3_IPV4
:
2148 MLX5_FLOW_LAYER_OUTER_L3_IPV4
;
2149 mask
.ipv4
= flow_tcf_item_mask
2150 (items
, &rte_flow_item_ipv4_mask
,
2151 &flow_tcf_mask_supported
.ipv4
,
2152 &flow_tcf_mask_empty
.ipv4
,
2153 sizeof(flow_tcf_mask_supported
.ipv4
),
2157 if (mask
.ipv4
->hdr
.next_proto_id
&&
2158 mask
.ipv4
->hdr
.next_proto_id
!= 0xff)
2159 return rte_flow_error_set
2161 RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
2163 "no support for partial mask on"
2164 " \"hdr.next_proto_id\" field");
2165 else if (mask
.ipv4
->hdr
.next_proto_id
)
2167 ((const struct rte_flow_item_ipv4
*)
2168 (items
->spec
))->hdr
.next_proto_id
;
2169 if (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) {
2170 if (inner_etype
!= RTE_BE16(ETH_P_ALL
) &&
2171 inner_etype
!= RTE_BE16(ETH_P_IP
))
2172 return rte_flow_error_set
2174 RTE_FLOW_ERROR_TYPE_ITEM
,
2176 "inner eth_type conflict,"
2177 " IPv4 is required");
2178 inner_etype
= RTE_BE16(ETH_P_IP
);
2179 } else if (item_flags
& MLX5_FLOW_LAYER_OUTER_VLAN
) {
2180 if (vlan_etype
!= RTE_BE16(ETH_P_ALL
) &&
2181 vlan_etype
!= RTE_BE16(ETH_P_IP
))
2182 return rte_flow_error_set
2184 RTE_FLOW_ERROR_TYPE_ITEM
,
2186 "vlan eth_type conflict,"
2187 " IPv4 is required");
2188 vlan_etype
= RTE_BE16(ETH_P_IP
);
2190 if (outer_etype
!= RTE_BE16(ETH_P_ALL
) &&
2191 outer_etype
!= RTE_BE16(ETH_P_IP
))
2192 return rte_flow_error_set
2194 RTE_FLOW_ERROR_TYPE_ITEM
,
2196 "eth_type conflict,"
2197 " IPv4 is required");
2198 outer_etype
= RTE_BE16(ETH_P_IP
);
2201 case RTE_FLOW_ITEM_TYPE_IPV6
:
2202 ret
= mlx5_flow_validate_item_ipv6
2204 &flow_tcf_mask_supported
.ipv6
, error
);
2207 item_flags
|= (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) ?
2208 MLX5_FLOW_LAYER_INNER_L3_IPV6
:
2209 MLX5_FLOW_LAYER_OUTER_L3_IPV6
;
2210 mask
.ipv6
= flow_tcf_item_mask
2211 (items
, &rte_flow_item_ipv6_mask
,
2212 &flow_tcf_mask_supported
.ipv6
,
2213 &flow_tcf_mask_empty
.ipv6
,
2214 sizeof(flow_tcf_mask_supported
.ipv6
),
2218 if (mask
.ipv6
->hdr
.proto
&&
2219 mask
.ipv6
->hdr
.proto
!= 0xff)
2220 return rte_flow_error_set
2222 RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
2224 "no support for partial mask on"
2225 " \"hdr.proto\" field");
2226 else if (mask
.ipv6
->hdr
.proto
)
2228 ((const struct rte_flow_item_ipv6
*)
2229 (items
->spec
))->hdr
.proto
;
2230 if (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) {
2231 if (inner_etype
!= RTE_BE16(ETH_P_ALL
) &&
2232 inner_etype
!= RTE_BE16(ETH_P_IPV6
))
2233 return rte_flow_error_set
2235 RTE_FLOW_ERROR_TYPE_ITEM
,
2237 "inner eth_type conflict,"
2238 " IPv6 is required");
2239 inner_etype
= RTE_BE16(ETH_P_IPV6
);
2240 } else if (item_flags
& MLX5_FLOW_LAYER_OUTER_VLAN
) {
2241 if (vlan_etype
!= RTE_BE16(ETH_P_ALL
) &&
2242 vlan_etype
!= RTE_BE16(ETH_P_IPV6
))
2243 return rte_flow_error_set
2245 RTE_FLOW_ERROR_TYPE_ITEM
,
2247 "vlan eth_type conflict,"
2248 " IPv6 is required");
2249 vlan_etype
= RTE_BE16(ETH_P_IPV6
);
2251 if (outer_etype
!= RTE_BE16(ETH_P_ALL
) &&
2252 outer_etype
!= RTE_BE16(ETH_P_IPV6
))
2253 return rte_flow_error_set
2255 RTE_FLOW_ERROR_TYPE_ITEM
,
2257 "eth_type conflict,"
2258 " IPv6 is required");
2259 outer_etype
= RTE_BE16(ETH_P_IPV6
);
2262 case RTE_FLOW_ITEM_TYPE_UDP
:
2263 ret
= mlx5_flow_validate_item_udp(items
, item_flags
,
2264 next_protocol
, error
);
2267 item_flags
|= (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) ?
2268 MLX5_FLOW_LAYER_INNER_L4_UDP
:
2269 MLX5_FLOW_LAYER_OUTER_L4_UDP
;
2270 mask
.udp
= flow_tcf_item_mask
2271 (items
, &rte_flow_item_udp_mask
,
2272 &flow_tcf_mask_supported
.udp
,
2273 &flow_tcf_mask_empty
.udp
,
2274 sizeof(flow_tcf_mask_supported
.udp
),
2279 * Save the presumed outer UDP item for extra check
2280 * if the tunnel item will be found later in the list.
2282 if (!(item_flags
& MLX5_FLOW_LAYER_TUNNEL
))
2285 case RTE_FLOW_ITEM_TYPE_TCP
:
2286 ret
= mlx5_flow_validate_item_tcp
2289 &flow_tcf_mask_supported
.tcp
,
2293 item_flags
|= (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) ?
2294 MLX5_FLOW_LAYER_INNER_L4_TCP
:
2295 MLX5_FLOW_LAYER_OUTER_L4_TCP
;
2296 mask
.tcp
= flow_tcf_item_mask
2297 (items
, &rte_flow_item_tcp_mask
,
2298 &flow_tcf_mask_supported
.tcp
,
2299 &flow_tcf_mask_empty
.tcp
,
2300 sizeof(flow_tcf_mask_supported
.tcp
),
2305 case RTE_FLOW_ITEM_TYPE_VXLAN
:
2306 if (item_flags
& MLX5_FLOW_LAYER_OUTER_VLAN
)
2307 return rte_flow_error_set
2309 RTE_FLOW_ERROR_TYPE_ITEM
, items
,
2310 "vxlan tunnel over vlan"
2311 " is not supported");
2312 ret
= mlx5_flow_validate_item_vxlan(items
,
2316 item_flags
|= MLX5_FLOW_LAYER_VXLAN
;
2317 mask
.vxlan
= flow_tcf_item_mask
2318 (items
, &rte_flow_item_vxlan_mask
,
2319 &flow_tcf_mask_supported
.vxlan
,
2320 &flow_tcf_mask_empty
.vxlan
,
2321 sizeof(flow_tcf_mask_supported
.vxlan
), error
);
2324 if (mask
.vxlan
->vni
[0] != 0xff ||
2325 mask
.vxlan
->vni
[1] != 0xff ||
2326 mask
.vxlan
->vni
[2] != 0xff)
2327 return rte_flow_error_set
2329 RTE_FLOW_ERROR_TYPE_ITEM_MASK
,
2331 "no support for partial or "
2332 "empty mask on \"vxlan.vni\" field");
2334 * The VNI item assumes the VXLAN tunnel, it requires
2335 * at least the outer destination UDP port must be
2336 * specified without wildcards to allow kernel select
2337 * the virtual VXLAN device by port. Also outer IPv4
2338 * or IPv6 item must be specified (wilcards or even
2339 * zero mask are allowed) to let driver know the tunnel
2340 * IP version and process UDP traffic correctly.
2343 (MLX5_FLOW_LAYER_OUTER_L3_IPV4
|
2344 MLX5_FLOW_LAYER_OUTER_L3_IPV6
)))
2345 return rte_flow_error_set
2347 RTE_FLOW_ERROR_TYPE_ACTION
,
2349 "no outer IP pattern found"
2350 " for vxlan tunnel");
2351 if (!(item_flags
& MLX5_FLOW_LAYER_OUTER_L4_UDP
))
2352 return rte_flow_error_set
2354 RTE_FLOW_ERROR_TYPE_ACTION
,
2356 "no outer UDP pattern found"
2357 " for vxlan tunnel");
2359 * All items preceding the tunnel item become outer
2360 * ones and we should do extra validation for them
2361 * due to tc limitations for tunnel outer parameters.
2362 * Currently only outer UDP item requres extra check,
2363 * use the saved pointer instead of item list rescan.
2366 ret
= flow_tcf_validate_vxlan_decap_udp
2370 /* Reset L4 protocol for inner parameters. */
2371 next_protocol
= 0xff;
2374 return rte_flow_error_set(error
, ENOTSUP
,
2375 RTE_FLOW_ERROR_TYPE_ITEM
,
2376 items
, "item not supported");
2379 if ((action_flags
& MLX5_TCF_PEDIT_ACTIONS
) &&
2380 (action_flags
& MLX5_FLOW_ACTION_DROP
))
2381 return rte_flow_error_set(error
, ENOTSUP
,
2382 RTE_FLOW_ERROR_TYPE_ACTION
,
2384 "set action is not compatible with "
2386 if ((action_flags
& MLX5_TCF_PEDIT_ACTIONS
) &&
2387 !(action_flags
& MLX5_FLOW_ACTION_PORT_ID
))
2388 return rte_flow_error_set(error
, ENOTSUP
,
2389 RTE_FLOW_ERROR_TYPE_ACTION
,
2391 "set action must be followed by "
2394 (MLX5_FLOW_ACTION_SET_IPV4_SRC
| MLX5_FLOW_ACTION_SET_IPV4_DST
)) {
2395 if (!(item_flags
& MLX5_FLOW_LAYER_OUTER_L3_IPV4
))
2396 return rte_flow_error_set(error
, EINVAL
,
2397 RTE_FLOW_ERROR_TYPE_ACTION
,
2399 "no ipv4 item found in"
2403 (MLX5_FLOW_ACTION_SET_IPV6_SRC
| MLX5_FLOW_ACTION_SET_IPV6_DST
)) {
2404 if (!(item_flags
& MLX5_FLOW_LAYER_OUTER_L3_IPV6
))
2405 return rte_flow_error_set(error
, EINVAL
,
2406 RTE_FLOW_ERROR_TYPE_ACTION
,
2408 "no ipv6 item found in"
2412 (MLX5_FLOW_ACTION_SET_TP_SRC
| MLX5_FLOW_ACTION_SET_TP_DST
)) {
2414 (MLX5_FLOW_LAYER_OUTER_L4_UDP
|
2415 MLX5_FLOW_LAYER_OUTER_L4_TCP
)))
2416 return rte_flow_error_set(error
, EINVAL
,
2417 RTE_FLOW_ERROR_TYPE_ACTION
,
2419 "no TCP/UDP item found in"
2423 * FW syndrome (0xA9C090):
2424 * set_flow_table_entry: push vlan action fte in fdb can ONLY be
2425 * forward to the uplink.
2427 if ((action_flags
& MLX5_FLOW_ACTION_OF_PUSH_VLAN
) &&
2428 (action_flags
& MLX5_FLOW_ACTION_PORT_ID
) &&
2429 ((struct mlx5_priv
*)port_id_dev
->data
->dev_private
)->representor
)
2430 return rte_flow_error_set(error
, ENOTSUP
,
2431 RTE_FLOW_ERROR_TYPE_ACTION
, actions
,
2432 "vlan push can only be applied"
2433 " when forwarding to uplink port");
2435 * FW syndrome (0x294609):
2436 * set_flow_table_entry: modify/pop/push actions in fdb flow table
2437 * are supported only while forwarding to vport.
2439 if ((action_flags
& MLX5_TCF_VLAN_ACTIONS
) &&
2440 !(action_flags
& MLX5_FLOW_ACTION_PORT_ID
))
2441 return rte_flow_error_set(error
, ENOTSUP
,
2442 RTE_FLOW_ERROR_TYPE_ACTION
, actions
,
2443 "vlan actions are supported"
2444 " only with port_id action");
2445 if ((action_flags
& MLX5_TCF_VXLAN_ACTIONS
) &&
2446 !(action_flags
& MLX5_FLOW_ACTION_PORT_ID
))
2447 return rte_flow_error_set(error
, ENOTSUP
,
2448 RTE_FLOW_ERROR_TYPE_ACTION
, NULL
,
2449 "vxlan actions are supported"
2450 " only with port_id action");
2451 if (!(action_flags
& MLX5_TCF_FATE_ACTIONS
))
2452 return rte_flow_error_set(error
, EINVAL
,
2453 RTE_FLOW_ERROR_TYPE_ACTION
, actions
,
2454 "no fate action is found");
2456 (MLX5_FLOW_ACTION_SET_TTL
| MLX5_FLOW_ACTION_DEC_TTL
)) {
2458 (MLX5_FLOW_LAYER_OUTER_L3_IPV4
|
2459 MLX5_FLOW_LAYER_OUTER_L3_IPV6
)))
2460 return rte_flow_error_set(error
, EINVAL
,
2461 RTE_FLOW_ERROR_TYPE_ACTION
,
2463 "no IP found in pattern");
2466 (MLX5_FLOW_ACTION_SET_MAC_SRC
| MLX5_FLOW_ACTION_SET_MAC_DST
)) {
2467 if (!(item_flags
& MLX5_FLOW_LAYER_OUTER_L2
))
2468 return rte_flow_error_set(error
, ENOTSUP
,
2469 RTE_FLOW_ERROR_TYPE_ACTION
,
2471 "no ethernet found in"
2474 if ((action_flags
& MLX5_FLOW_ACTION_VXLAN_DECAP
) &&
2475 !(item_flags
& MLX5_FLOW_LAYER_VXLAN
))
2476 return rte_flow_error_set(error
, EINVAL
,
2477 RTE_FLOW_ERROR_TYPE_ACTION
,
2479 "no VNI pattern found"
2480 " for vxlan decap action");
2481 if ((action_flags
& MLX5_FLOW_ACTION_VXLAN_ENCAP
) &&
2482 (item_flags
& MLX5_FLOW_LAYER_TUNNEL
))
2483 return rte_flow_error_set(error
, EINVAL
,
2484 RTE_FLOW_ERROR_TYPE_ACTION
,
2486 "vxlan encap not supported"
2487 " for tunneled traffic");
2492 * Calculate maximum size of memory for flow items of Linux TC flower.
2495 * Pointer to the flow attributes.
2497 * Pointer to the list of items.
2498 * @param[out] action_flags
2499 * Pointer to the detected actions.
2502 * Maximum size of memory for items.
2505 flow_tcf_get_items_size(const struct rte_flow_attr
*attr
,
2506 const struct rte_flow_item items
[],
2507 uint64_t *action_flags
)
2511 size
+= SZ_NLATTR_STRZ_OF("flower") +
2512 SZ_NLATTR_TYPE_OF(uint16_t) + /* Outer ether type. */
2513 SZ_NLATTR_NEST
+ /* TCA_OPTIONS. */
2514 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
2515 if (attr
->group
> 0)
2516 size
+= SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
2517 for (; items
->type
!= RTE_FLOW_ITEM_TYPE_END
; items
++) {
2518 switch (items
->type
) {
2519 case RTE_FLOW_ITEM_TYPE_VOID
:
2521 case RTE_FLOW_ITEM_TYPE_PORT_ID
:
2523 case RTE_FLOW_ITEM_TYPE_ETH
:
2524 size
+= SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN
) * 4;
2525 /* dst/src MAC addr and mask. */
2527 case RTE_FLOW_ITEM_TYPE_VLAN
:
2528 size
+= SZ_NLATTR_TYPE_OF(uint16_t) +
2529 /* VLAN Ether type. */
2530 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
2531 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
2533 case RTE_FLOW_ITEM_TYPE_IPV4
: {
2534 const struct rte_flow_item_ipv4
*ipv4
= items
->mask
;
2536 size
+= SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2537 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
2538 /* dst/src IP addr and mask. */
2539 if (ipv4
&& ipv4
->hdr
.time_to_live
)
2540 size
+= SZ_NLATTR_TYPE_OF(uint8_t) * 2;
2541 if (ipv4
&& ipv4
->hdr
.type_of_service
)
2542 size
+= SZ_NLATTR_TYPE_OF(uint8_t) * 2;
2545 case RTE_FLOW_ITEM_TYPE_IPV6
: {
2546 const struct rte_flow_item_ipv6
*ipv6
= items
->mask
;
2548 size
+= SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2549 SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN
) * 4;
2550 /* dst/src IP addr and mask. */
2551 if (ipv6
&& ipv6
->hdr
.hop_limits
)
2552 size
+= SZ_NLATTR_TYPE_OF(uint8_t) * 2;
2553 if (ipv6
&& (rte_be_to_cpu_32(ipv6
->hdr
.vtc_flow
) &
2554 (0xfful
<< IPV6_HDR_TC_SHIFT
)))
2555 size
+= SZ_NLATTR_TYPE_OF(uint8_t) * 2;
2558 case RTE_FLOW_ITEM_TYPE_UDP
:
2559 size
+= SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2560 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
2561 /* dst/src port and mask. */
2563 case RTE_FLOW_ITEM_TYPE_TCP
:
2564 size
+= SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
2565 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
2566 /* dst/src port and mask. */
2568 case RTE_FLOW_ITEM_TYPE_VXLAN
:
2569 size
+= SZ_NLATTR_TYPE_OF(uint32_t);
2571 * There might be no VXLAN decap action in the action
2572 * list, nonetheless the VXLAN tunnel flow requires
2573 * the decap structure to be correctly applied to
2574 * VXLAN device, set the flag to create the structure.
2575 * Translation routine will not put the decap action
2576 * in tne Netlink message if there is no actual action
2579 *action_flags
|= MLX5_FLOW_ACTION_VXLAN_DECAP
;
2583 "unsupported item %p type %d,"
2584 " items must be validated before flow creation",
2585 (const void *)items
, items
->type
);
2593 * Calculate size of memory to store the VXLAN encapsultion
2594 * related items in the Netlink message buffer. Items list
2595 * is specified by RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action.
2596 * The item list should be validated.
2599 * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
2600 * List of pattern items to scan data from.
2603 * The size the part of Netlink message buffer to store the
2604 * VXLAN encapsulation item attributes.
2607 flow_tcf_vxlan_encap_size(const struct rte_flow_action
*action
)
2609 const struct rte_flow_item
*items
;
2612 assert(action
->type
== RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP
);
2613 assert(action
->conf
);
2615 items
= ((const struct rte_flow_action_vxlan_encap
*)
2616 action
->conf
)->definition
;
2618 for (; items
->type
!= RTE_FLOW_ITEM_TYPE_END
; items
++) {
2619 switch (items
->type
) {
2620 case RTE_FLOW_ITEM_TYPE_VOID
:
2622 case RTE_FLOW_ITEM_TYPE_ETH
:
2623 /* This item does not require message buffer. */
2625 case RTE_FLOW_ITEM_TYPE_IPV4
: {
2626 const struct rte_flow_item_ipv4
*ipv4
= items
->mask
;
2628 size
+= SZ_NLATTR_DATA_OF(IPV4_ADDR_LEN
) * 2;
2629 if (ipv4
&& ipv4
->hdr
.time_to_live
)
2630 size
+= SZ_NLATTR_TYPE_OF(uint8_t) * 2;
2631 if (ipv4
&& ipv4
->hdr
.type_of_service
)
2632 size
+= SZ_NLATTR_TYPE_OF(uint8_t) * 2;
2635 case RTE_FLOW_ITEM_TYPE_IPV6
: {
2636 const struct rte_flow_item_ipv6
*ipv6
= items
->mask
;
2638 size
+= SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN
) * 2;
2639 if (ipv6
&& ipv6
->hdr
.hop_limits
)
2640 size
+= SZ_NLATTR_TYPE_OF(uint8_t) * 2;
2641 if (ipv6
&& (rte_be_to_cpu_32(ipv6
->hdr
.vtc_flow
) &
2642 (0xfful
<< IPV6_HDR_TC_SHIFT
)))
2643 size
+= SZ_NLATTR_TYPE_OF(uint8_t) * 2;
2646 case RTE_FLOW_ITEM_TYPE_UDP
: {
2647 const struct rte_flow_item_udp
*udp
= items
->mask
;
2649 size
+= SZ_NLATTR_TYPE_OF(uint16_t);
2650 if (!udp
|| udp
->hdr
.src_port
!= RTE_BE16(0x0000))
2651 size
+= SZ_NLATTR_TYPE_OF(uint16_t);
2654 case RTE_FLOW_ITEM_TYPE_VXLAN
:
2655 size
+= SZ_NLATTR_TYPE_OF(uint32_t);
2660 "unsupported item %p type %d,"
2661 " items must be validated"
2662 " before flow creation",
2663 (const void *)items
, items
->type
);
2671 * Calculate maximum size of memory for flow actions of Linux TC flower and
2672 * extract specified actions.
2674 * @param[in] actions
2675 * Pointer to the list of actions.
2676 * @param[out] action_flags
2677 * Pointer to the detected actions.
2680 * Maximum size of memory for actions.
2683 flow_tcf_get_actions_and_size(const struct rte_flow_action actions
[],
2684 uint64_t *action_flags
)
2687 uint64_t flags
= *action_flags
;
2689 size
+= SZ_NLATTR_NEST
; /* TCA_FLOWER_ACT. */
2690 for (; actions
->type
!= RTE_FLOW_ACTION_TYPE_END
; actions
++) {
2691 switch (actions
->type
) {
2692 case RTE_FLOW_ACTION_TYPE_VOID
:
2694 case RTE_FLOW_ACTION_TYPE_PORT_ID
:
2695 size
+= SZ_NLATTR_NEST
+ /* na_act_index. */
2696 SZ_NLATTR_STRZ_OF("mirred") +
2697 SZ_NLATTR_NEST
+ /* TCA_ACT_OPTIONS. */
2698 SZ_NLATTR_TYPE_OF(struct tc_mirred
);
2699 flags
|= MLX5_FLOW_ACTION_PORT_ID
;
2701 case RTE_FLOW_ACTION_TYPE_JUMP
:
2702 size
+= SZ_NLATTR_NEST
+ /* na_act_index. */
2703 SZ_NLATTR_STRZ_OF("gact") +
2704 SZ_NLATTR_NEST
+ /* TCA_ACT_OPTIONS. */
2705 SZ_NLATTR_TYPE_OF(struct tc_gact
);
2706 flags
|= MLX5_FLOW_ACTION_JUMP
;
2708 case RTE_FLOW_ACTION_TYPE_DROP
:
2709 size
+= SZ_NLATTR_NEST
+ /* na_act_index. */
2710 SZ_NLATTR_STRZ_OF("gact") +
2711 SZ_NLATTR_NEST
+ /* TCA_ACT_OPTIONS. */
2712 SZ_NLATTR_TYPE_OF(struct tc_gact
);
2713 flags
|= MLX5_FLOW_ACTION_DROP
;
2715 case RTE_FLOW_ACTION_TYPE_COUNT
:
2717 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN
:
2718 flags
|= MLX5_FLOW_ACTION_OF_POP_VLAN
;
2719 goto action_of_vlan
;
2720 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN
:
2721 flags
|= MLX5_FLOW_ACTION_OF_PUSH_VLAN
;
2722 goto action_of_vlan
;
2723 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID
:
2724 flags
|= MLX5_FLOW_ACTION_OF_SET_VLAN_VID
;
2725 goto action_of_vlan
;
2726 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP
:
2727 flags
|= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP
;
2728 goto action_of_vlan
;
2730 size
+= SZ_NLATTR_NEST
+ /* na_act_index. */
2731 SZ_NLATTR_STRZ_OF("vlan") +
2732 SZ_NLATTR_NEST
+ /* TCA_ACT_OPTIONS. */
2733 SZ_NLATTR_TYPE_OF(struct tc_vlan
) +
2734 SZ_NLATTR_TYPE_OF(uint16_t) +
2735 /* VLAN protocol. */
2736 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
2737 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
2739 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP
:
2740 size
+= SZ_NLATTR_NEST
+ /* na_act_index. */
2741 SZ_NLATTR_STRZ_OF("tunnel_key") +
2742 SZ_NLATTR_NEST
+ /* TCA_ACT_OPTIONS. */
2743 SZ_NLATTR_TYPE_OF(uint8_t);
2744 size
+= SZ_NLATTR_TYPE_OF(struct tc_tunnel_key
);
2745 size
+= flow_tcf_vxlan_encap_size(actions
) +
2746 RTE_ALIGN_CEIL
/* preceding encap params. */
2747 (sizeof(struct flow_tcf_vxlan_encap
),
2749 flags
|= MLX5_FLOW_ACTION_VXLAN_ENCAP
;
2751 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP
:
2752 size
+= SZ_NLATTR_NEST
+ /* na_act_index. */
2753 SZ_NLATTR_STRZ_OF("tunnel_key") +
2754 SZ_NLATTR_NEST
+ /* TCA_ACT_OPTIONS. */
2755 SZ_NLATTR_TYPE_OF(uint8_t);
2756 size
+= SZ_NLATTR_TYPE_OF(struct tc_tunnel_key
);
2757 size
+= RTE_ALIGN_CEIL
/* preceding decap params. */
2758 (sizeof(struct flow_tcf_vxlan_decap
),
2760 flags
|= MLX5_FLOW_ACTION_VXLAN_DECAP
;
2762 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
:
2763 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
:
2764 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
:
2765 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
:
2766 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC
:
2767 case RTE_FLOW_ACTION_TYPE_SET_TP_DST
:
2768 case RTE_FLOW_ACTION_TYPE_SET_TTL
:
2769 case RTE_FLOW_ACTION_TYPE_DEC_TTL
:
2770 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC
:
2771 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST
:
2772 size
+= flow_tcf_get_pedit_actions_size(&actions
,
2777 "unsupported action %p type %d,"
2778 " items must be validated before flow creation",
2779 (const void *)actions
, actions
->type
);
2783 *action_flags
= flags
;
2788 * Prepare a flow object for Linux TC flower. It calculates the maximum size of
2789 * memory required, allocates the memory, initializes Netlink message headers
2790 * and set unique TC message handle.
2793 * Pointer to the flow attributes.
2795 * Pointer to the list of items.
2796 * @param[in] actions
2797 * Pointer to the list of actions.
2799 * Pointer to the error structure.
2802 * Pointer to mlx5_flow object on success,
2803 * otherwise NULL and rte_errno is set.
2805 static struct mlx5_flow
*
2806 flow_tcf_prepare(const struct rte_flow_attr
*attr
,
2807 const struct rte_flow_item items
[],
2808 const struct rte_flow_action actions
[],
2809 struct rte_flow_error
*error
)
2811 size_t size
= RTE_ALIGN_CEIL
2812 (sizeof(struct mlx5_flow
),
2813 alignof(struct flow_tcf_tunnel_hdr
)) +
2814 MNL_ALIGN(sizeof(struct nlmsghdr
)) +
2815 MNL_ALIGN(sizeof(struct tcmsg
));
2816 struct mlx5_flow
*dev_flow
;
2817 uint64_t action_flags
= 0;
2818 struct nlmsghdr
*nlh
;
2820 uint8_t *sp
, *tun
= NULL
;
2822 size
+= flow_tcf_get_items_size(attr
, items
, &action_flags
);
2823 size
+= flow_tcf_get_actions_and_size(actions
, &action_flags
);
2824 dev_flow
= rte_zmalloc(__func__
, size
, MNL_ALIGNTO
);
2826 rte_flow_error_set(error
, ENOMEM
,
2827 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
2828 "not enough memory to create E-Switch flow");
2831 sp
= (uint8_t *)(dev_flow
+ 1);
2832 if (action_flags
& MLX5_FLOW_ACTION_VXLAN_ENCAP
) {
2834 (sp
, alignof(struct flow_tcf_tunnel_hdr
));
2836 sp
+= RTE_ALIGN_CEIL
2837 (sizeof(struct flow_tcf_vxlan_encap
),
2840 size
-= RTE_ALIGN_CEIL
2841 (sizeof(struct flow_tcf_vxlan_encap
),
2844 } else if (action_flags
& MLX5_FLOW_ACTION_VXLAN_DECAP
) {
2846 (sp
, alignof(struct flow_tcf_tunnel_hdr
));
2848 sp
+= RTE_ALIGN_CEIL
2849 (sizeof(struct flow_tcf_vxlan_decap
),
2852 size
-= RTE_ALIGN_CEIL
2853 (sizeof(struct flow_tcf_vxlan_decap
),
2857 sp
= RTE_PTR_ALIGN(sp
, MNL_ALIGNTO
);
2859 nlh
= mnl_nlmsg_put_header(sp
);
2860 tcm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*tcm
));
2861 *dev_flow
= (struct mlx5_flow
){
2862 .tcf
= (struct mlx5_flow_tcf
){
2864 .nlsize
= size
- RTE_ALIGN_CEIL
2865 (sizeof(struct mlx5_flow
),
2866 alignof(struct flow_tcf_tunnel_hdr
)),
2868 .tunnel
= (struct flow_tcf_tunnel_hdr
*)tun
,
2873 if (action_flags
& MLX5_FLOW_ACTION_VXLAN_DECAP
)
2874 dev_flow
->tcf
.tunnel
->type
= FLOW_TCF_TUNACT_VXLAN_DECAP
;
2875 else if (action_flags
& MLX5_FLOW_ACTION_VXLAN_ENCAP
)
2876 dev_flow
->tcf
.tunnel
->type
= FLOW_TCF_TUNACT_VXLAN_ENCAP
;
2881 * Make adjustments for supporting count actions.
2884 * Pointer to the Ethernet device structure.
2885 * @param[in] dev_flow
2886 * Pointer to mlx5_flow.
2888 * Pointer to error structure.
2891 * 0 On success else a negative errno value is returned and rte_errno is set.
2894 flow_tcf_translate_action_count(struct rte_eth_dev
*dev __rte_unused
,
2895 struct mlx5_flow
*dev_flow
,
2896 struct rte_flow_error
*error
)
2898 struct rte_flow
*flow
= dev_flow
->flow
;
2900 if (!flow
->counter
) {
2901 flow
->counter
= flow_tcf_counter_new();
2903 return rte_flow_error_set(error
, rte_errno
,
2904 RTE_FLOW_ERROR_TYPE_ACTION
,
2906 "cannot get counter"
2913 * Convert VXLAN VNI to 32-bit integer.
2916 * VXLAN VNI in 24-bit wire format.
2919 * VXLAN VNI as a 32-bit integer value in network endianness.
2921 static inline rte_be32_t
2922 vxlan_vni_as_be32(const uint8_t vni
[3])
2928 .vni
= { 0, vni
[0], vni
[1], vni
[2] },
2934 * Helper function to process RTE_FLOW_ITEM_TYPE_ETH entry in configuration
2935 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the MAC address fields
2936 * in the encapsulation parameters structure. The item must be prevalidated,
2937 * no any validation checks performed by function.
2940 * RTE_FLOW_ITEM_TYPE_ETH entry specification.
2942 * RTE_FLOW_ITEM_TYPE_ETH entry mask.
2944 * Structure to fill the gathered MAC address data.
2947 flow_tcf_parse_vxlan_encap_eth(const struct rte_flow_item_eth
*spec
,
2948 const struct rte_flow_item_eth
*mask
,
2949 struct flow_tcf_vxlan_encap
*encap
)
2951 /* Item must be validated before. No redundant checks. */
2953 if (!mask
|| !memcmp(&mask
->dst
,
2954 &rte_flow_item_eth_mask
.dst
,
2955 sizeof(rte_flow_item_eth_mask
.dst
))) {
2957 * Ethernet addresses are not supported by
2958 * tc as tunnel_key parameters. Destination
2959 * address is needed to form encap packet
2960 * header and retrieved by kernel from
2961 * implicit sources (ARP table, etc),
2962 * address masks are not supported at all.
2964 encap
->eth
.dst
= spec
->dst
;
2965 encap
->mask
|= FLOW_TCF_ENCAP_ETH_DST
;
2967 if (!mask
|| !memcmp(&mask
->src
,
2968 &rte_flow_item_eth_mask
.src
,
2969 sizeof(rte_flow_item_eth_mask
.src
))) {
2971 * Ethernet addresses are not supported by
2972 * tc as tunnel_key parameters. Source ethernet
2973 * address is ignored anyway.
2975 encap
->eth
.src
= spec
->src
;
2976 encap
->mask
|= FLOW_TCF_ENCAP_ETH_SRC
;
2981 * Helper function to process RTE_FLOW_ITEM_TYPE_IPV4 entry in configuration
2982 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the IPV4 address fields
2983 * in the encapsulation parameters structure. The item must be prevalidated,
2984 * no any validation checks performed by function.
2987 * RTE_FLOW_ITEM_TYPE_IPV4 entry specification.
2989 * RTE_FLOW_ITEM_TYPE_IPV4 entry mask.
2991 * Structure to fill the gathered IPV4 address data.
2994 flow_tcf_parse_vxlan_encap_ipv4(const struct rte_flow_item_ipv4
*spec
,
2995 const struct rte_flow_item_ipv4
*mask
,
2996 struct flow_tcf_vxlan_encap
*encap
)
2998 /* Item must be validated before. No redundant checks. */
3000 encap
->ipv4
.dst
= spec
->hdr
.dst_addr
;
3001 encap
->ipv4
.src
= spec
->hdr
.src_addr
;
3002 encap
->mask
|= FLOW_TCF_ENCAP_IPV4_SRC
|
3003 FLOW_TCF_ENCAP_IPV4_DST
;
3004 if (mask
&& mask
->hdr
.type_of_service
) {
3005 encap
->mask
|= FLOW_TCF_ENCAP_IP_TOS
;
3006 encap
->ip_tos
= spec
->hdr
.type_of_service
;
3008 if (mask
&& mask
->hdr
.time_to_live
) {
3009 encap
->mask
|= FLOW_TCF_ENCAP_IP_TTL
;
3010 encap
->ip_ttl_hop
= spec
->hdr
.time_to_live
;
3015 * Helper function to process RTE_FLOW_ITEM_TYPE_IPV6 entry in configuration
3016 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the IPV6 address fields
3017 * in the encapsulation parameters structure. The item must be prevalidated,
3018 * no any validation checks performed by function.
3021 * RTE_FLOW_ITEM_TYPE_IPV6 entry specification.
3023 * RTE_FLOW_ITEM_TYPE_IPV6 entry mask.
3025 * Structure to fill the gathered IPV6 address data.
3028 flow_tcf_parse_vxlan_encap_ipv6(const struct rte_flow_item_ipv6
*spec
,
3029 const struct rte_flow_item_ipv6
*mask
,
3030 struct flow_tcf_vxlan_encap
*encap
)
3032 /* Item must be validated before. No redundant checks. */
3034 memcpy(encap
->ipv6
.dst
, spec
->hdr
.dst_addr
, IPV6_ADDR_LEN
);
3035 memcpy(encap
->ipv6
.src
, spec
->hdr
.src_addr
, IPV6_ADDR_LEN
);
3036 encap
->mask
|= FLOW_TCF_ENCAP_IPV6_SRC
|
3037 FLOW_TCF_ENCAP_IPV6_DST
;
3039 if ((rte_be_to_cpu_32(mask
->hdr
.vtc_flow
) >>
3040 IPV6_HDR_TC_SHIFT
) & 0xff) {
3041 encap
->mask
|= FLOW_TCF_ENCAP_IP_TOS
;
3042 encap
->ip_tos
= (rte_be_to_cpu_32
3043 (spec
->hdr
.vtc_flow
) >>
3044 IPV6_HDR_TC_SHIFT
) & 0xff;
3046 if (mask
->hdr
.hop_limits
) {
3047 encap
->mask
|= FLOW_TCF_ENCAP_IP_TTL
;
3048 encap
->ip_ttl_hop
= spec
->hdr
.hop_limits
;
3054 * Helper function to process RTE_FLOW_ITEM_TYPE_UDP entry in configuration
3055 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the UDP port fields
3056 * in the encapsulation parameters structure. The item must be prevalidated,
3057 * no any validation checks performed by function.
3060 * RTE_FLOW_ITEM_TYPE_UDP entry specification.
3062 * RTE_FLOW_ITEM_TYPE_UDP entry mask.
3064 * Structure to fill the gathered UDP port data.
3067 flow_tcf_parse_vxlan_encap_udp(const struct rte_flow_item_udp
*spec
,
3068 const struct rte_flow_item_udp
*mask
,
3069 struct flow_tcf_vxlan_encap
*encap
)
3072 encap
->udp
.dst
= spec
->hdr
.dst_port
;
3073 encap
->mask
|= FLOW_TCF_ENCAP_UDP_DST
;
3074 if (!mask
|| mask
->hdr
.src_port
!= RTE_BE16(0x0000)) {
3075 encap
->udp
.src
= spec
->hdr
.src_port
;
3076 encap
->mask
|= FLOW_TCF_ENCAP_IPV4_SRC
;
3081 * Helper function to process RTE_FLOW_ITEM_TYPE_VXLAN entry in configuration
3082 * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the VNI fields
3083 * in the encapsulation parameters structure. The item must be prevalidated,
3084 * no any validation checks performed by function.
3087 * RTE_FLOW_ITEM_TYPE_VXLAN entry specification.
3089 * Structure to fill the gathered VNI address data.
3092 flow_tcf_parse_vxlan_encap_vni(const struct rte_flow_item_vxlan
*spec
,
3093 struct flow_tcf_vxlan_encap
*encap
)
3095 /* Item must be validated before. Do not redundant checks. */
3097 memcpy(encap
->vxlan
.vni
, spec
->vni
, sizeof(encap
->vxlan
.vni
));
3098 encap
->mask
|= FLOW_TCF_ENCAP_VXLAN_VNI
;
3102 * Populate consolidated encapsulation object from list of pattern items.
3104 * Helper function to process configuration of action such as
3105 * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. The item list should be
3106 * validated, there is no way to return an meaningful error.
3109 * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
3110 * List of pattern items to gather data from.
3112 * Structure to fill gathered data.
3115 flow_tcf_vxlan_encap_parse(const struct rte_flow_action
*action
,
3116 struct flow_tcf_vxlan_encap
*encap
)
3119 const struct rte_flow_item_eth
*eth
;
3120 const struct rte_flow_item_ipv4
*ipv4
;
3121 const struct rte_flow_item_ipv6
*ipv6
;
3122 const struct rte_flow_item_udp
*udp
;
3123 const struct rte_flow_item_vxlan
*vxlan
;
3125 const struct rte_flow_item
*items
;
3127 assert(action
->type
== RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP
);
3128 assert(action
->conf
);
3130 items
= ((const struct rte_flow_action_vxlan_encap
*)
3131 action
->conf
)->definition
;
3133 for (; items
->type
!= RTE_FLOW_ITEM_TYPE_END
; items
++) {
3134 switch (items
->type
) {
3135 case RTE_FLOW_ITEM_TYPE_VOID
:
3137 case RTE_FLOW_ITEM_TYPE_ETH
:
3138 mask
.eth
= items
->mask
;
3139 spec
.eth
= items
->spec
;
3140 flow_tcf_parse_vxlan_encap_eth(spec
.eth
, mask
.eth
,
3143 case RTE_FLOW_ITEM_TYPE_IPV4
:
3144 spec
.ipv4
= items
->spec
;
3145 mask
.ipv4
= items
->mask
;
3146 flow_tcf_parse_vxlan_encap_ipv4(spec
.ipv4
, mask
.ipv4
,
3149 case RTE_FLOW_ITEM_TYPE_IPV6
:
3150 spec
.ipv6
= items
->spec
;
3151 mask
.ipv6
= items
->mask
;
3152 flow_tcf_parse_vxlan_encap_ipv6(spec
.ipv6
, mask
.ipv6
,
3155 case RTE_FLOW_ITEM_TYPE_UDP
:
3156 mask
.udp
= items
->mask
;
3157 spec
.udp
= items
->spec
;
3158 flow_tcf_parse_vxlan_encap_udp(spec
.udp
, mask
.udp
,
3161 case RTE_FLOW_ITEM_TYPE_VXLAN
:
3162 spec
.vxlan
= items
->spec
;
3163 flow_tcf_parse_vxlan_encap_vni(spec
.vxlan
, encap
);
3168 "unsupported item %p type %d,"
3169 " items must be validated"
3170 " before flow creation",
3171 (const void *)items
, items
->type
);
3179 * Translate flow for Linux TC flower and construct Netlink message.
3182 * Pointer to the priv structure.
3183 * @param[in, out] flow
3184 * Pointer to the sub flow.
3186 * Pointer to the flow attributes.
3188 * Pointer to the list of items.
3189 * @param[in] actions
3190 * Pointer to the list of actions.
3192 * Pointer to the error structure.
3195 * 0 on success, a negative errno value otherwise and rte_errno is set.
3198 flow_tcf_translate(struct rte_eth_dev
*dev
, struct mlx5_flow
*dev_flow
,
3199 const struct rte_flow_attr
*attr
,
3200 const struct rte_flow_item items
[],
3201 const struct rte_flow_action actions
[],
3202 struct rte_flow_error
*error
)
3205 const struct rte_flow_item_port_id
*port_id
;
3206 const struct rte_flow_item_eth
*eth
;
3207 const struct rte_flow_item_vlan
*vlan
;
3208 const struct rte_flow_item_ipv4
*ipv4
;
3209 const struct rte_flow_item_ipv6
*ipv6
;
3210 const struct rte_flow_item_tcp
*tcp
;
3211 const struct rte_flow_item_udp
*udp
;
3212 const struct rte_flow_item_vxlan
*vxlan
;
3215 const struct rte_flow_action_port_id
*port_id
;
3216 const struct rte_flow_action_jump
*jump
;
3217 const struct rte_flow_action_of_push_vlan
*of_push_vlan
;
3218 const struct rte_flow_action_of_set_vlan_vid
*
3220 const struct rte_flow_action_of_set_vlan_pcp
*
3224 struct flow_tcf_tunnel_hdr
*hdr
;
3225 struct flow_tcf_vxlan_decap
*vxlan
;
3230 struct flow_tcf_tunnel_hdr
*hdr
;
3231 struct flow_tcf_vxlan_encap
*vxlan
;
3235 struct flow_tcf_ptoi ptoi
[PTOI_TABLE_SZ_MAX(dev
)];
3236 struct nlmsghdr
*nlh
= dev_flow
->tcf
.nlh
;
3237 struct tcmsg
*tcm
= dev_flow
->tcf
.tcm
;
3238 uint32_t na_act_index_cur
;
3239 rte_be16_t inner_etype
= RTE_BE16(ETH_P_ALL
);
3240 rte_be16_t outer_etype
= RTE_BE16(ETH_P_ALL
);
3241 rte_be16_t vlan_etype
= RTE_BE16(ETH_P_ALL
);
3242 bool ip_proto_set
= 0;
3243 bool tunnel_outer
= 0;
3244 struct nlattr
*na_flower
;
3245 struct nlattr
*na_flower_act
;
3246 struct nlattr
*na_vlan_id
= NULL
;
3247 struct nlattr
*na_vlan_priority
= NULL
;
3248 uint64_t item_flags
= 0;
3251 claim_nonzero(flow_tcf_build_ptoi_table(dev
, ptoi
,
3252 PTOI_TABLE_SZ_MAX(dev
)));
3253 if (dev_flow
->tcf
.tunnel
) {
3254 switch (dev_flow
->tcf
.tunnel
->type
) {
3255 case FLOW_TCF_TUNACT_VXLAN_DECAP
:
3256 decap
.vxlan
= dev_flow
->tcf
.vxlan_decap
;
3259 case FLOW_TCF_TUNACT_VXLAN_ENCAP
:
3260 encap
.vxlan
= dev_flow
->tcf
.vxlan_encap
;
3262 /* New tunnel actions can be added here. */
3268 nlh
= dev_flow
->tcf
.nlh
;
3269 tcm
= dev_flow
->tcf
.tcm
;
3270 /* Prepare API must have been called beforehand. */
3271 assert(nlh
!= NULL
&& tcm
!= NULL
);
3272 tcm
->tcm_family
= AF_UNSPEC
;
3273 tcm
->tcm_ifindex
= ptoi
[0].ifindex
;
3274 tcm
->tcm_parent
= TC_H_MAKE(TC_H_INGRESS
, TC_H_MIN_INGRESS
);
3276 * Priority cannot be zero to prevent the kernel from picking one
3279 tcm
->tcm_info
= TC_H_MAKE((attr
->priority
+ 1) << 16, outer_etype
);
3280 if (attr
->group
> 0)
3281 mnl_attr_put_u32(nlh
, TCA_CHAIN
, attr
->group
);
3282 mnl_attr_put_strz(nlh
, TCA_KIND
, "flower");
3283 na_flower
= mnl_attr_nest_start(nlh
, TCA_OPTIONS
);
3284 for (; items
->type
!= RTE_FLOW_ITEM_TYPE_END
; items
++) {
3287 switch (items
->type
) {
3288 case RTE_FLOW_ITEM_TYPE_VOID
:
3290 case RTE_FLOW_ITEM_TYPE_PORT_ID
:
3291 mask
.port_id
= flow_tcf_item_mask
3292 (items
, &rte_flow_item_port_id_mask
,
3293 &flow_tcf_mask_supported
.port_id
,
3294 &flow_tcf_mask_empty
.port_id
,
3295 sizeof(flow_tcf_mask_supported
.port_id
),
3297 assert(mask
.port_id
);
3298 if (mask
.port_id
== &flow_tcf_mask_empty
.port_id
)
3300 spec
.port_id
= items
->spec
;
3301 if (!mask
.port_id
->id
)
3304 for (i
= 0; ptoi
[i
].ifindex
; ++i
)
3305 if (ptoi
[i
].port_id
== spec
.port_id
->id
)
3307 assert(ptoi
[i
].ifindex
);
3308 tcm
->tcm_ifindex
= ptoi
[i
].ifindex
;
3310 case RTE_FLOW_ITEM_TYPE_ETH
:
3311 item_flags
|= (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) ?
3312 MLX5_FLOW_LAYER_INNER_L2
:
3313 MLX5_FLOW_LAYER_OUTER_L2
;
3314 mask
.eth
= flow_tcf_item_mask
3315 (items
, &rte_flow_item_eth_mask
,
3316 &flow_tcf_mask_supported
.eth
,
3317 &flow_tcf_mask_empty
.eth
,
3318 sizeof(flow_tcf_mask_supported
.eth
),
3321 if (mask
.eth
== &flow_tcf_mask_empty
.eth
)
3323 spec
.eth
= items
->spec
;
3324 if (mask
.eth
->type
) {
3325 if (item_flags
& MLX5_FLOW_LAYER_TUNNEL
)
3326 inner_etype
= spec
.eth
->type
;
3328 outer_etype
= spec
.eth
->type
;
3332 "outer L2 addresses cannot be"
3333 " forced is outer ones for tunnel,"
3334 " parameter is ignored");
3337 if (!is_zero_ether_addr(&mask
.eth
->dst
)) {
3338 mnl_attr_put(nlh
, TCA_FLOWER_KEY_ETH_DST
,
3340 spec
.eth
->dst
.addr_bytes
);
3341 mnl_attr_put(nlh
, TCA_FLOWER_KEY_ETH_DST_MASK
,
3343 mask
.eth
->dst
.addr_bytes
);
3345 if (!is_zero_ether_addr(&mask
.eth
->src
)) {
3346 mnl_attr_put(nlh
, TCA_FLOWER_KEY_ETH_SRC
,
3348 spec
.eth
->src
.addr_bytes
);
3349 mnl_attr_put(nlh
, TCA_FLOWER_KEY_ETH_SRC_MASK
,
3351 mask
.eth
->src
.addr_bytes
);
3353 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
3355 case RTE_FLOW_ITEM_TYPE_VLAN
:
3358 assert(!tunnel_outer
);
3359 item_flags
|= MLX5_FLOW_LAYER_OUTER_VLAN
;
3360 mask
.vlan
= flow_tcf_item_mask
3361 (items
, &rte_flow_item_vlan_mask
,
3362 &flow_tcf_mask_supported
.vlan
,
3363 &flow_tcf_mask_empty
.vlan
,
3364 sizeof(flow_tcf_mask_supported
.vlan
),
3367 if (mask
.vlan
== &flow_tcf_mask_empty
.vlan
)
3369 spec
.vlan
= items
->spec
;
3370 assert(outer_etype
== RTE_BE16(ETH_P_ALL
) ||
3371 outer_etype
== RTE_BE16(ETH_P_8021Q
));
3372 outer_etype
= RTE_BE16(ETH_P_8021Q
);
3373 if (mask
.vlan
->inner_type
)
3374 vlan_etype
= spec
.vlan
->inner_type
;
3375 if (mask
.vlan
->tci
& RTE_BE16(0xe000))
3376 mnl_attr_put_u8(nlh
, TCA_FLOWER_KEY_VLAN_PRIO
,
3378 (spec
.vlan
->tci
) >> 13) & 0x7);
3379 if (mask
.vlan
->tci
& RTE_BE16(0x0fff))
3380 mnl_attr_put_u16(nlh
, TCA_FLOWER_KEY_VLAN_ID
,
3384 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
3386 case RTE_FLOW_ITEM_TYPE_IPV4
:
3387 item_flags
|= (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) ?
3388 MLX5_FLOW_LAYER_INNER_L3_IPV4
:
3389 MLX5_FLOW_LAYER_OUTER_L3_IPV4
;
3390 mask
.ipv4
= flow_tcf_item_mask
3391 (items
, &rte_flow_item_ipv4_mask
,
3392 &flow_tcf_mask_supported
.ipv4
,
3393 &flow_tcf_mask_empty
.ipv4
,
3394 sizeof(flow_tcf_mask_supported
.ipv4
),
3397 if (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) {
3398 assert(inner_etype
== RTE_BE16(ETH_P_ALL
) ||
3399 inner_etype
== RTE_BE16(ETH_P_IP
));
3400 inner_etype
= RTE_BE16(ETH_P_IP
);
3401 } else if (outer_etype
== RTE_BE16(ETH_P_8021Q
)) {
3402 assert(vlan_etype
== RTE_BE16(ETH_P_ALL
) ||
3403 vlan_etype
== RTE_BE16(ETH_P_IP
));
3404 vlan_etype
= RTE_BE16(ETH_P_IP
);
3406 assert(outer_etype
== RTE_BE16(ETH_P_ALL
) ||
3407 outer_etype
== RTE_BE16(ETH_P_IP
));
3408 outer_etype
= RTE_BE16(ETH_P_IP
);
3410 spec
.ipv4
= items
->spec
;
3411 if (!tunnel_outer
&& mask
.ipv4
->hdr
.next_proto_id
) {
3413 * No way to set IP protocol for outer tunnel
3414 * layers. Usually it is fixed, for example,
3415 * to UDP for VXLAN/GPE.
3417 assert(spec
.ipv4
); /* Mask is not empty. */
3418 mnl_attr_put_u8(nlh
, TCA_FLOWER_KEY_IP_PROTO
,
3419 spec
.ipv4
->hdr
.next_proto_id
);
3422 if (mask
.ipv4
== &flow_tcf_mask_empty
.ipv4
||
3423 (!mask
.ipv4
->hdr
.src_addr
&&
3424 !mask
.ipv4
->hdr
.dst_addr
)) {
3428 * For tunnel outer we must set outer IP key
3429 * anyway, even if the specification/mask is
3430 * empty. There is no another way to tell
3431 * kernel about he outer layer protocol.
3434 (nlh
, TCA_FLOWER_KEY_ENC_IPV4_SRC
,
3435 mask
.ipv4
->hdr
.src_addr
);
3437 (nlh
, TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
,
3438 mask
.ipv4
->hdr
.src_addr
);
3439 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
3442 if (mask
.ipv4
->hdr
.src_addr
) {
3444 (nlh
, tunnel_outer
?
3445 TCA_FLOWER_KEY_ENC_IPV4_SRC
:
3446 TCA_FLOWER_KEY_IPV4_SRC
,
3447 spec
.ipv4
->hdr
.src_addr
);
3449 (nlh
, tunnel_outer
?
3450 TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
:
3451 TCA_FLOWER_KEY_IPV4_SRC_MASK
,
3452 mask
.ipv4
->hdr
.src_addr
);
3454 if (mask
.ipv4
->hdr
.dst_addr
) {
3456 (nlh
, tunnel_outer
?
3457 TCA_FLOWER_KEY_ENC_IPV4_DST
:
3458 TCA_FLOWER_KEY_IPV4_DST
,
3459 spec
.ipv4
->hdr
.dst_addr
);
3461 (nlh
, tunnel_outer
?
3462 TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
:
3463 TCA_FLOWER_KEY_IPV4_DST_MASK
,
3464 mask
.ipv4
->hdr
.dst_addr
);
3466 if (mask
.ipv4
->hdr
.time_to_live
) {
3468 (nlh
, tunnel_outer
?
3469 TCA_FLOWER_KEY_ENC_IP_TTL
:
3470 TCA_FLOWER_KEY_IP_TTL
,
3471 spec
.ipv4
->hdr
.time_to_live
);
3473 (nlh
, tunnel_outer
?
3474 TCA_FLOWER_KEY_ENC_IP_TTL_MASK
:
3475 TCA_FLOWER_KEY_IP_TTL_MASK
,
3476 mask
.ipv4
->hdr
.time_to_live
);
3478 if (mask
.ipv4
->hdr
.type_of_service
) {
3480 (nlh
, tunnel_outer
?
3481 TCA_FLOWER_KEY_ENC_IP_TOS
:
3482 TCA_FLOWER_KEY_IP_TOS
,
3483 spec
.ipv4
->hdr
.type_of_service
);
3485 (nlh
, tunnel_outer
?
3486 TCA_FLOWER_KEY_ENC_IP_TOS_MASK
:
3487 TCA_FLOWER_KEY_IP_TOS_MASK
,
3488 mask
.ipv4
->hdr
.type_of_service
);
3490 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
3492 case RTE_FLOW_ITEM_TYPE_IPV6
: {
3493 bool ipv6_src
, ipv6_dst
;
3496 item_flags
|= (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) ?
3497 MLX5_FLOW_LAYER_INNER_L3_IPV6
:
3498 MLX5_FLOW_LAYER_OUTER_L3_IPV6
;
3499 mask
.ipv6
= flow_tcf_item_mask
3500 (items
, &rte_flow_item_ipv6_mask
,
3501 &flow_tcf_mask_supported
.ipv6
,
3502 &flow_tcf_mask_empty
.ipv6
,
3503 sizeof(flow_tcf_mask_supported
.ipv6
),
3506 if (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) {
3507 assert(inner_etype
== RTE_BE16(ETH_P_ALL
) ||
3508 inner_etype
== RTE_BE16(ETH_P_IPV6
));
3509 inner_etype
= RTE_BE16(ETH_P_IPV6
);
3510 } else if (outer_etype
== RTE_BE16(ETH_P_8021Q
)) {
3511 assert(vlan_etype
== RTE_BE16(ETH_P_ALL
) ||
3512 vlan_etype
== RTE_BE16(ETH_P_IPV6
));
3513 vlan_etype
= RTE_BE16(ETH_P_IPV6
);
3515 assert(outer_etype
== RTE_BE16(ETH_P_ALL
) ||
3516 outer_etype
== RTE_BE16(ETH_P_IPV6
));
3517 outer_etype
= RTE_BE16(ETH_P_IPV6
);
3519 spec
.ipv6
= items
->spec
;
3520 if (!tunnel_outer
&& mask
.ipv6
->hdr
.proto
) {
3522 * No way to set IP protocol for outer tunnel
3523 * layers. Usually it is fixed, for example,
3524 * to UDP for VXLAN/GPE.
3526 assert(spec
.ipv6
); /* Mask is not empty. */
3527 mnl_attr_put_u8(nlh
, TCA_FLOWER_KEY_IP_PROTO
,
3528 spec
.ipv6
->hdr
.proto
);
3531 ipv6_dst
= !IN6_IS_ADDR_UNSPECIFIED
3532 (mask
.ipv6
->hdr
.dst_addr
);
3533 ipv6_src
= !IN6_IS_ADDR_UNSPECIFIED
3534 (mask
.ipv6
->hdr
.src_addr
);
3535 if (mask
.ipv6
== &flow_tcf_mask_empty
.ipv6
||
3536 (!ipv6_dst
&& !ipv6_src
)) {
3540 * For tunnel outer we must set outer IP key
3541 * anyway, even if the specification/mask is
3542 * empty. There is no another way to tell
3543 * kernel about he outer layer protocol.
3546 TCA_FLOWER_KEY_ENC_IPV6_SRC
,
3548 mask
.ipv6
->hdr
.src_addr
);
3550 TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
,
3552 mask
.ipv6
->hdr
.src_addr
);
3553 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
3557 mnl_attr_put(nlh
, tunnel_outer
?
3558 TCA_FLOWER_KEY_ENC_IPV6_SRC
:
3559 TCA_FLOWER_KEY_IPV6_SRC
,
3561 spec
.ipv6
->hdr
.src_addr
);
3562 mnl_attr_put(nlh
, tunnel_outer
?
3563 TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
:
3564 TCA_FLOWER_KEY_IPV6_SRC_MASK
,
3566 mask
.ipv6
->hdr
.src_addr
);
3569 mnl_attr_put(nlh
, tunnel_outer
?
3570 TCA_FLOWER_KEY_ENC_IPV6_DST
:
3571 TCA_FLOWER_KEY_IPV6_DST
,
3573 spec
.ipv6
->hdr
.dst_addr
);
3574 mnl_attr_put(nlh
, tunnel_outer
?
3575 TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
:
3576 TCA_FLOWER_KEY_IPV6_DST_MASK
,
3578 mask
.ipv6
->hdr
.dst_addr
);
3580 if (mask
.ipv6
->hdr
.hop_limits
) {
3582 (nlh
, tunnel_outer
?
3583 TCA_FLOWER_KEY_ENC_IP_TTL
:
3584 TCA_FLOWER_KEY_IP_TTL
,
3585 spec
.ipv6
->hdr
.hop_limits
);
3587 (nlh
, tunnel_outer
?
3588 TCA_FLOWER_KEY_ENC_IP_TTL_MASK
:
3589 TCA_FLOWER_KEY_IP_TTL_MASK
,
3590 mask
.ipv6
->hdr
.hop_limits
);
3592 msk6
= (rte_be_to_cpu_32(mask
.ipv6
->hdr
.vtc_flow
) >>
3593 IPV6_HDR_TC_SHIFT
) & 0xff;
3595 tos6
= (rte_be_to_cpu_32
3596 (spec
.ipv6
->hdr
.vtc_flow
) >>
3597 IPV6_HDR_TC_SHIFT
) & 0xff;
3599 (nlh
, tunnel_outer
?
3600 TCA_FLOWER_KEY_ENC_IP_TOS
:
3601 TCA_FLOWER_KEY_IP_TOS
, tos6
);
3603 (nlh
, tunnel_outer
?
3604 TCA_FLOWER_KEY_ENC_IP_TOS_MASK
:
3605 TCA_FLOWER_KEY_IP_TOS_MASK
, msk6
);
3607 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
3610 case RTE_FLOW_ITEM_TYPE_UDP
:
3611 item_flags
|= (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) ?
3612 MLX5_FLOW_LAYER_INNER_L4_UDP
:
3613 MLX5_FLOW_LAYER_OUTER_L4_UDP
;
3614 mask
.udp
= flow_tcf_item_mask
3615 (items
, &rte_flow_item_udp_mask
,
3616 &flow_tcf_mask_supported
.udp
,
3617 &flow_tcf_mask_empty
.udp
,
3618 sizeof(flow_tcf_mask_supported
.udp
),
3621 spec
.udp
= items
->spec
;
3622 if (!tunnel_outer
) {
3625 (nlh
, TCA_FLOWER_KEY_IP_PROTO
,
3627 if (mask
.udp
== &flow_tcf_mask_empty
.udp
)
3630 assert(mask
.udp
!= &flow_tcf_mask_empty
.udp
);
3631 decap
.vxlan
->udp_port
=
3633 (spec
.udp
->hdr
.dst_port
);
3635 if (mask
.udp
->hdr
.src_port
) {
3637 (nlh
, tunnel_outer
?
3638 TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
:
3639 TCA_FLOWER_KEY_UDP_SRC
,
3640 spec
.udp
->hdr
.src_port
);
3642 (nlh
, tunnel_outer
?
3643 TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
:
3644 TCA_FLOWER_KEY_UDP_SRC_MASK
,
3645 mask
.udp
->hdr
.src_port
);
3647 if (mask
.udp
->hdr
.dst_port
) {
3649 (nlh
, tunnel_outer
?
3650 TCA_FLOWER_KEY_ENC_UDP_DST_PORT
:
3651 TCA_FLOWER_KEY_UDP_DST
,
3652 spec
.udp
->hdr
.dst_port
);
3654 (nlh
, tunnel_outer
?
3655 TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
:
3656 TCA_FLOWER_KEY_UDP_DST_MASK
,
3657 mask
.udp
->hdr
.dst_port
);
3659 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
3661 case RTE_FLOW_ITEM_TYPE_TCP
:
3662 item_flags
|= (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) ?
3663 MLX5_FLOW_LAYER_INNER_L4_TCP
:
3664 MLX5_FLOW_LAYER_OUTER_L4_TCP
;
3665 mask
.tcp
= flow_tcf_item_mask
3666 (items
, &rte_flow_item_tcp_mask
,
3667 &flow_tcf_mask_supported
.tcp
,
3668 &flow_tcf_mask_empty
.tcp
,
3669 sizeof(flow_tcf_mask_supported
.tcp
),
3673 mnl_attr_put_u8(nlh
, TCA_FLOWER_KEY_IP_PROTO
,
3675 if (mask
.tcp
== &flow_tcf_mask_empty
.tcp
)
3677 spec
.tcp
= items
->spec
;
3678 if (mask
.tcp
->hdr
.src_port
) {
3679 mnl_attr_put_u16(nlh
, TCA_FLOWER_KEY_TCP_SRC
,
3680 spec
.tcp
->hdr
.src_port
);
3681 mnl_attr_put_u16(nlh
,
3682 TCA_FLOWER_KEY_TCP_SRC_MASK
,
3683 mask
.tcp
->hdr
.src_port
);
3685 if (mask
.tcp
->hdr
.dst_port
) {
3686 mnl_attr_put_u16(nlh
, TCA_FLOWER_KEY_TCP_DST
,
3687 spec
.tcp
->hdr
.dst_port
);
3688 mnl_attr_put_u16(nlh
,
3689 TCA_FLOWER_KEY_TCP_DST_MASK
,
3690 mask
.tcp
->hdr
.dst_port
);
3692 if (mask
.tcp
->hdr
.tcp_flags
) {
3695 TCA_FLOWER_KEY_TCP_FLAGS
,
3697 (spec
.tcp
->hdr
.tcp_flags
));
3700 TCA_FLOWER_KEY_TCP_FLAGS_MASK
,
3702 (mask
.tcp
->hdr
.tcp_flags
));
3704 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
3706 case RTE_FLOW_ITEM_TYPE_VXLAN
:
3707 assert(decap
.vxlan
);
3709 item_flags
|= MLX5_FLOW_LAYER_VXLAN
;
3710 spec
.vxlan
= items
->spec
;
3711 mnl_attr_put_u32(nlh
,
3712 TCA_FLOWER_KEY_ENC_KEY_ID
,
3713 vxlan_vni_as_be32(spec
.vxlan
->vni
));
3714 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
3717 return rte_flow_error_set(error
, ENOTSUP
,
3718 RTE_FLOW_ERROR_TYPE_ITEM
,
3719 NULL
, "item not supported");
3723 * Set the ether_type flower key and tc rule protocol:
3724 * - if there is nor VLAN neither VXLAN the key is taken from
3725 * eth item directly or deduced from L3 items.
3726 * - if there is vlan item then key is fixed to 802.1q.
3727 * - if there is vxlan item then key is set to inner tunnel type.
3728 * - simultaneous vlan and vxlan items are prohibited.
3730 if (outer_etype
!= RTE_BE16(ETH_P_ALL
)) {
3731 tcm
->tcm_info
= TC_H_MAKE((attr
->priority
+ 1) << 16,
3733 if (item_flags
& MLX5_FLOW_LAYER_TUNNEL
) {
3734 if (inner_etype
!= RTE_BE16(ETH_P_ALL
))
3735 mnl_attr_put_u16(nlh
,
3736 TCA_FLOWER_KEY_ETH_TYPE
,
3739 mnl_attr_put_u16(nlh
,
3740 TCA_FLOWER_KEY_ETH_TYPE
,
3742 if (outer_etype
== RTE_BE16(ETH_P_8021Q
) &&
3743 vlan_etype
!= RTE_BE16(ETH_P_ALL
))
3744 mnl_attr_put_u16(nlh
,
3745 TCA_FLOWER_KEY_VLAN_ETH_TYPE
,
3748 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
3750 na_flower_act
= mnl_attr_nest_start(nlh
, TCA_FLOWER_ACT
);
3751 na_act_index_cur
= 1;
3752 for (; actions
->type
!= RTE_FLOW_ACTION_TYPE_END
; actions
++) {
3753 struct nlattr
*na_act_index
;
3754 struct nlattr
*na_act
;
3755 unsigned int vlan_act
;
3758 switch (actions
->type
) {
3759 case RTE_FLOW_ACTION_TYPE_VOID
:
3761 case RTE_FLOW_ACTION_TYPE_PORT_ID
:
3762 conf
.port_id
= actions
->conf
;
3763 if (conf
.port_id
->original
)
3766 for (i
= 0; ptoi
[i
].ifindex
; ++i
)
3767 if (ptoi
[i
].port_id
== conf
.port_id
->id
)
3769 assert(ptoi
[i
].ifindex
);
3771 mnl_attr_nest_start(nlh
, na_act_index_cur
++);
3772 assert(na_act_index
);
3773 mnl_attr_put_strz(nlh
, TCA_ACT_KIND
, "mirred");
3774 na_act
= mnl_attr_nest_start(nlh
, TCA_ACT_OPTIONS
);
3777 assert(dev_flow
->tcf
.tunnel
);
3778 dev_flow
->tcf
.tunnel
->ifindex_ptr
=
3779 &((struct tc_mirred
*)
3780 mnl_attr_get_payload
3781 (mnl_nlmsg_get_payload_tail
3783 } else if (decap
.hdr
) {
3784 assert(dev_flow
->tcf
.tunnel
);
3785 dev_flow
->tcf
.tunnel
->ifindex_ptr
=
3786 (unsigned int *)&tcm
->tcm_ifindex
;
3788 mnl_attr_put(nlh
, TCA_MIRRED_PARMS
,
3789 sizeof(struct tc_mirred
),
3790 &(struct tc_mirred
){
3791 .action
= TC_ACT_STOLEN
,
3792 .eaction
= TCA_EGRESS_REDIR
,
3793 .ifindex
= ptoi
[i
].ifindex
,
3795 mnl_attr_nest_end(nlh
, na_act
);
3796 mnl_attr_nest_end(nlh
, na_act_index
);
3798 case RTE_FLOW_ACTION_TYPE_JUMP
:
3799 conf
.jump
= actions
->conf
;
3801 mnl_attr_nest_start(nlh
, na_act_index_cur
++);
3802 assert(na_act_index
);
3803 mnl_attr_put_strz(nlh
, TCA_ACT_KIND
, "gact");
3804 na_act
= mnl_attr_nest_start(nlh
, TCA_ACT_OPTIONS
);
3806 mnl_attr_put(nlh
, TCA_GACT_PARMS
,
3807 sizeof(struct tc_gact
),
3809 .action
= TC_ACT_GOTO_CHAIN
|
3812 mnl_attr_nest_end(nlh
, na_act
);
3813 mnl_attr_nest_end(nlh
, na_act_index
);
3815 case RTE_FLOW_ACTION_TYPE_DROP
:
3817 mnl_attr_nest_start(nlh
, na_act_index_cur
++);
3818 assert(na_act_index
);
3819 mnl_attr_put_strz(nlh
, TCA_ACT_KIND
, "gact");
3820 na_act
= mnl_attr_nest_start(nlh
, TCA_ACT_OPTIONS
);
3822 mnl_attr_put(nlh
, TCA_GACT_PARMS
,
3823 sizeof(struct tc_gact
),
3825 .action
= TC_ACT_SHOT
,
3827 mnl_attr_nest_end(nlh
, na_act
);
3828 mnl_attr_nest_end(nlh
, na_act_index
);
3830 case RTE_FLOW_ACTION_TYPE_COUNT
:
3832 * Driver adds the count action implicitly for
3833 * each rule it creates.
3835 ret
= flow_tcf_translate_action_count(dev
,
3840 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN
:
3841 conf
.of_push_vlan
= NULL
;
3842 vlan_act
= TCA_VLAN_ACT_POP
;
3843 goto action_of_vlan
;
3844 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN
:
3845 conf
.of_push_vlan
= actions
->conf
;
3846 vlan_act
= TCA_VLAN_ACT_PUSH
;
3847 goto action_of_vlan
;
3848 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID
:
3849 conf
.of_set_vlan_vid
= actions
->conf
;
3851 goto override_na_vlan_id
;
3852 vlan_act
= TCA_VLAN_ACT_MODIFY
;
3853 goto action_of_vlan
;
3854 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP
:
3855 conf
.of_set_vlan_pcp
= actions
->conf
;
3856 if (na_vlan_priority
)
3857 goto override_na_vlan_priority
;
3858 vlan_act
= TCA_VLAN_ACT_MODIFY
;
3859 goto action_of_vlan
;
3862 mnl_attr_nest_start(nlh
, na_act_index_cur
++);
3863 assert(na_act_index
);
3864 mnl_attr_put_strz(nlh
, TCA_ACT_KIND
, "vlan");
3865 na_act
= mnl_attr_nest_start(nlh
, TCA_ACT_OPTIONS
);
3867 mnl_attr_put(nlh
, TCA_VLAN_PARMS
,
3868 sizeof(struct tc_vlan
),
3870 .action
= TC_ACT_PIPE
,
3871 .v_action
= vlan_act
,
3873 if (vlan_act
== TCA_VLAN_ACT_POP
) {
3874 mnl_attr_nest_end(nlh
, na_act
);
3875 mnl_attr_nest_end(nlh
, na_act_index
);
3878 if (vlan_act
== TCA_VLAN_ACT_PUSH
)
3879 mnl_attr_put_u16(nlh
,
3880 TCA_VLAN_PUSH_VLAN_PROTOCOL
,
3881 conf
.of_push_vlan
->ethertype
);
3882 na_vlan_id
= mnl_nlmsg_get_payload_tail(nlh
);
3883 mnl_attr_put_u16(nlh
, TCA_VLAN_PAD
, 0);
3884 na_vlan_priority
= mnl_nlmsg_get_payload_tail(nlh
);
3885 mnl_attr_put_u8(nlh
, TCA_VLAN_PAD
, 0);
3886 mnl_attr_nest_end(nlh
, na_act
);
3887 mnl_attr_nest_end(nlh
, na_act_index
);
3888 if (actions
->type
==
3889 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID
) {
3890 override_na_vlan_id
:
3891 na_vlan_id
->nla_type
= TCA_VLAN_PUSH_VLAN_ID
;
3892 *(uint16_t *)mnl_attr_get_payload(na_vlan_id
) =
3894 (conf
.of_set_vlan_vid
->vlan_vid
);
3895 } else if (actions
->type
==
3896 RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP
) {
3897 override_na_vlan_priority
:
3898 na_vlan_priority
->nla_type
=
3899 TCA_VLAN_PUSH_VLAN_PRIORITY
;
3900 *(uint8_t *)mnl_attr_get_payload
3901 (na_vlan_priority
) =
3902 conf
.of_set_vlan_pcp
->vlan_pcp
;
3905 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP
:
3906 assert(decap
.vxlan
);
3907 assert(dev_flow
->tcf
.tunnel
);
3908 dev_flow
->tcf
.tunnel
->ifindex_ptr
=
3909 (unsigned int *)&tcm
->tcm_ifindex
;
3911 mnl_attr_nest_start(nlh
, na_act_index_cur
++);
3912 assert(na_act_index
);
3913 mnl_attr_put_strz(nlh
, TCA_ACT_KIND
, "tunnel_key");
3914 na_act
= mnl_attr_nest_start(nlh
, TCA_ACT_OPTIONS
);
3916 mnl_attr_put(nlh
, TCA_TUNNEL_KEY_PARMS
,
3917 sizeof(struct tc_tunnel_key
),
3918 &(struct tc_tunnel_key
){
3919 .action
= TC_ACT_PIPE
,
3920 .t_action
= TCA_TUNNEL_KEY_ACT_RELEASE
,
3922 mnl_attr_nest_end(nlh
, na_act
);
3923 mnl_attr_nest_end(nlh
, na_act_index
);
3924 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
3926 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP
:
3927 assert(encap
.vxlan
);
3928 flow_tcf_vxlan_encap_parse(actions
, encap
.vxlan
);
3930 mnl_attr_nest_start(nlh
, na_act_index_cur
++);
3931 assert(na_act_index
);
3932 mnl_attr_put_strz(nlh
, TCA_ACT_KIND
, "tunnel_key");
3933 na_act
= mnl_attr_nest_start(nlh
, TCA_ACT_OPTIONS
);
3935 mnl_attr_put(nlh
, TCA_TUNNEL_KEY_PARMS
,
3936 sizeof(struct tc_tunnel_key
),
3937 &(struct tc_tunnel_key
){
3938 .action
= TC_ACT_PIPE
,
3939 .t_action
= TCA_TUNNEL_KEY_ACT_SET
,
3941 if (encap
.vxlan
->mask
& FLOW_TCF_ENCAP_UDP_DST
)
3942 mnl_attr_put_u16(nlh
,
3943 TCA_TUNNEL_KEY_ENC_DST_PORT
,
3944 encap
.vxlan
->udp
.dst
);
3945 if (encap
.vxlan
->mask
& FLOW_TCF_ENCAP_IPV4_SRC
)
3946 mnl_attr_put_u32(nlh
,
3947 TCA_TUNNEL_KEY_ENC_IPV4_SRC
,
3948 encap
.vxlan
->ipv4
.src
);
3949 if (encap
.vxlan
->mask
& FLOW_TCF_ENCAP_IPV4_DST
)
3950 mnl_attr_put_u32(nlh
,
3951 TCA_TUNNEL_KEY_ENC_IPV4_DST
,
3952 encap
.vxlan
->ipv4
.dst
);
3953 if (encap
.vxlan
->mask
& FLOW_TCF_ENCAP_IPV6_SRC
)
3955 TCA_TUNNEL_KEY_ENC_IPV6_SRC
,
3956 sizeof(encap
.vxlan
->ipv6
.src
),
3957 &encap
.vxlan
->ipv6
.src
);
3958 if (encap
.vxlan
->mask
& FLOW_TCF_ENCAP_IPV6_DST
)
3960 TCA_TUNNEL_KEY_ENC_IPV6_DST
,
3961 sizeof(encap
.vxlan
->ipv6
.dst
),
3962 &encap
.vxlan
->ipv6
.dst
);
3963 if (encap
.vxlan
->mask
& FLOW_TCF_ENCAP_IP_TTL
)
3964 mnl_attr_put_u8(nlh
,
3965 TCA_TUNNEL_KEY_ENC_TTL
,
3966 encap
.vxlan
->ip_ttl_hop
);
3967 if (encap
.vxlan
->mask
& FLOW_TCF_ENCAP_IP_TOS
)
3968 mnl_attr_put_u8(nlh
,
3969 TCA_TUNNEL_KEY_ENC_TOS
,
3970 encap
.vxlan
->ip_tos
);
3971 if (encap
.vxlan
->mask
& FLOW_TCF_ENCAP_VXLAN_VNI
)
3972 mnl_attr_put_u32(nlh
,
3973 TCA_TUNNEL_KEY_ENC_KEY_ID
,
3975 (encap
.vxlan
->vxlan
.vni
));
3976 mnl_attr_put_u8(nlh
, TCA_TUNNEL_KEY_NO_CSUM
, 0);
3977 mnl_attr_nest_end(nlh
, na_act
);
3978 mnl_attr_nest_end(nlh
, na_act_index
);
3979 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
3981 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
:
3982 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
:
3983 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
:
3984 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
:
3985 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC
:
3986 case RTE_FLOW_ACTION_TYPE_SET_TP_DST
:
3987 case RTE_FLOW_ACTION_TYPE_SET_TTL
:
3988 case RTE_FLOW_ACTION_TYPE_DEC_TTL
:
3989 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC
:
3990 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST
:
3992 mnl_attr_nest_start(nlh
, na_act_index_cur
++);
3993 flow_tcf_create_pedit_mnl_msg(nlh
,
3994 &actions
, item_flags
);
3995 mnl_attr_nest_end(nlh
, na_act_index
);
3998 return rte_flow_error_set(error
, ENOTSUP
,
3999 RTE_FLOW_ERROR_TYPE_ACTION
,
4001 "action not supported");
4005 assert(na_flower_act
);
4006 mnl_attr_nest_end(nlh
, na_flower_act
);
4007 dev_flow
->tcf
.ptc_flags
= mnl_attr_get_payload
4008 (mnl_nlmsg_get_payload_tail(nlh
));
4009 mnl_attr_put_u32(nlh
, TCA_FLOWER_FLAGS
, decap
.vxlan
?
4010 0 : TCA_CLS_FLAGS_SKIP_SW
);
4011 mnl_attr_nest_end(nlh
, na_flower
);
4012 if (dev_flow
->tcf
.tunnel
&& dev_flow
->tcf
.tunnel
->ifindex_ptr
)
4013 dev_flow
->tcf
.tunnel
->ifindex_org
=
4014 *dev_flow
->tcf
.tunnel
->ifindex_ptr
;
4015 assert(dev_flow
->tcf
.nlsize
>= nlh
->nlmsg_len
);
4020 * Send Netlink message with acknowledgment.
4023 * Flow context to use.
4025 * Message to send. This function always raises the NLM_F_ACK flag before
4028 * Callback handler for received message.
4030 * Context pointer for callback handler.
4033 * 0 on success, a negative errno value otherwise and rte_errno is set.
4036 flow_tcf_nl_ack(struct mlx5_flow_tcf_context
*tcf
,
4037 struct nlmsghdr
*nlh
,
4038 mnl_cb_t cb
, void *arg
)
4040 unsigned int portid
= mnl_socket_get_portid(tcf
->nl
);
4041 uint32_t seq
= tcf
->seq
++;
4047 /* seq 0 is reserved for kernel event-driven notifications. */
4050 nlh
->nlmsg_seq
= seq
;
4051 nlh
->nlmsg_flags
|= NLM_F_ACK
;
4052 ret
= mnl_socket_sendto(tcf
->nl
, nlh
, nlh
->nlmsg_len
);
4054 /* Message send error occurred. */
4058 nlh
= (struct nlmsghdr
*)(tcf
->buf
);
4060 * The following loop postpones non-fatal errors until multipart
4061 * messages are complete.
4064 ret
= mnl_socket_recvfrom(tcf
->nl
, tcf
->buf
, tcf
->buf_size
);
4068 * In case of overflow Will receive till
4069 * end of multipart message. We may lost part
4070 * of reply messages but mark and return an error.
4072 if (err
!= ENOSPC
||
4073 !(nlh
->nlmsg_flags
& NLM_F_MULTI
) ||
4074 nlh
->nlmsg_type
== NLMSG_DONE
)
4077 ret
= mnl_cb_run(nlh
, ret
, seq
, portid
, cb
, arg
);
4080 * libmnl returns 0 if DONE or
4081 * success ACK message found.
4087 * ACK message with error found
4088 * or some error occurred.
4093 /* We should continue receiving. */
4102 #define MNL_BUF_EXTRA_SPACE 16
4103 #define MNL_REQUEST_SIZE_MIN 256
4104 #define MNL_REQUEST_SIZE_MAX 2048
4105 #define MNL_REQUEST_SIZE RTE_MIN(RTE_MAX(sysconf(_SC_PAGESIZE), \
4106 MNL_REQUEST_SIZE_MIN), MNL_REQUEST_SIZE_MAX)
4108 /* Data structures used by flow_tcf_xxx_cb() routines. */
4109 struct tcf_nlcb_buf
{
4110 LIST_ENTRY(tcf_nlcb_buf
) next
;
4112 alignas(struct nlmsghdr
)
4113 uint8_t msg
[]; /**< Netlink message data. */
4116 struct tcf_nlcb_context
{
4117 unsigned int ifindex
; /**< Base interface index. */
4119 LIST_HEAD(, tcf_nlcb_buf
) nlbuf
;
4123 * Allocate space for netlink command in buffer list
4125 * @param[in, out] ctx
4126 * Pointer to callback context with command buffers list.
4128 * Required size of data buffer to be allocated.
4131 * Pointer to allocated memory, aligned as message header.
4132 * NULL if some error occurred.
4134 static struct nlmsghdr
*
4135 flow_tcf_alloc_nlcmd(struct tcf_nlcb_context
*ctx
, uint32_t size
)
4137 struct tcf_nlcb_buf
*buf
;
4138 struct nlmsghdr
*nlh
;
4140 size
= NLMSG_ALIGN(size
);
4141 buf
= LIST_FIRST(&ctx
->nlbuf
);
4142 if (buf
&& (buf
->size
+ size
) <= ctx
->bufsize
) {
4143 nlh
= (struct nlmsghdr
*)&buf
->msg
[buf
->size
];
4147 if (size
> ctx
->bufsize
) {
4148 DRV_LOG(WARNING
, "netlink: too long command buffer requested");
4151 buf
= rte_malloc(__func__
,
4152 ctx
->bufsize
+ sizeof(struct tcf_nlcb_buf
),
4153 alignof(struct tcf_nlcb_buf
));
4155 DRV_LOG(WARNING
, "netlink: no memory for command buffer");
4158 LIST_INSERT_HEAD(&ctx
->nlbuf
, buf
, next
);
4160 nlh
= (struct nlmsghdr
*)&buf
->msg
[0];
4165 * Send the buffers with prepared netlink commands. Scans the list and
4166 * sends all found buffers. Buffers are sent and freed anyway in order
4167 * to prevent memory leakage if some every message in received packet.
4170 * Context object initialized by mlx5_flow_tcf_context_create().
4171 * @param[in, out] ctx
4172 * Pointer to callback context with command buffers list.
4175 * Zero value on success, negative errno value otherwise
4176 * and rte_errno is set.
4179 flow_tcf_send_nlcmd(struct mlx5_flow_tcf_context
*tcf
,
4180 struct tcf_nlcb_context
*ctx
)
4182 struct tcf_nlcb_buf
*bc
= LIST_FIRST(&ctx
->nlbuf
);
4186 struct tcf_nlcb_buf
*bn
= LIST_NEXT(bc
, next
);
4187 struct nlmsghdr
*nlh
;
4191 while (msg
< bc
->size
) {
4193 * Send Netlink commands from buffer in one by one
4194 * fashion. If we send multiple rule deletion commands
4195 * in one Netlink message and some error occurs it may
4196 * cause multiple ACK error messages and break sequence
4197 * numbers of Netlink communication, because we expect
4198 * the only one ACK reply.
4200 assert((bc
->size
- msg
) >= sizeof(struct nlmsghdr
));
4201 nlh
= (struct nlmsghdr
*)&bc
->msg
[msg
];
4202 assert((bc
->size
- msg
) >= nlh
->nlmsg_len
);
4203 msg
+= nlh
->nlmsg_len
;
4204 rc
= flow_tcf_nl_ack(tcf
, nlh
, NULL
, NULL
);
4207 "netlink: cleanup error %d", rc
);
4215 LIST_INIT(&ctx
->nlbuf
);
4220 * Collect local IP address rules with scope link attribute on specified
4221 * network device. This is callback routine called by libmnl mnl_cb_run()
4222 * in loop for every message in received packet.
4225 * Pointer to reply header.
4226 * @param[in, out] arg
4227 * Opaque data pointer for this callback.
4230 * A positive, nonzero value on success, negative errno value otherwise
4231 * and rte_errno is set.
4234 flow_tcf_collect_local_cb(const struct nlmsghdr
*nlh
, void *arg
)
4236 struct tcf_nlcb_context
*ctx
= arg
;
4237 struct nlmsghdr
*cmd
;
4238 struct ifaddrmsg
*ifa
;
4240 struct nlattr
*na_local
= NULL
;
4241 struct nlattr
*na_peer
= NULL
;
4242 unsigned char family
;
4245 if (nlh
->nlmsg_type
!= RTM_NEWADDR
) {
4249 ifa
= mnl_nlmsg_get_payload(nlh
);
4250 family
= ifa
->ifa_family
;
4251 if (ifa
->ifa_index
!= ctx
->ifindex
||
4252 ifa
->ifa_scope
!= RT_SCOPE_LINK
||
4253 !(ifa
->ifa_flags
& IFA_F_PERMANENT
) ||
4254 (family
!= AF_INET
&& family
!= AF_INET6
))
4256 mnl_attr_for_each(na
, nlh
, sizeof(*ifa
)) {
4257 switch (mnl_attr_get_type(na
)) {
4265 if (na_local
&& na_peer
)
4268 if (!na_local
|| !na_peer
)
4270 /* Local rule found with scope link, permanent and assigned peer. */
4271 size
= MNL_ALIGN(sizeof(struct nlmsghdr
)) +
4272 MNL_ALIGN(sizeof(struct ifaddrmsg
)) +
4273 (family
== AF_INET6
? 2 * SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN
)
4274 : 2 * SZ_NLATTR_TYPE_OF(uint32_t));
4275 cmd
= flow_tcf_alloc_nlcmd(ctx
, size
);
4280 cmd
= mnl_nlmsg_put_header(cmd
);
4281 cmd
->nlmsg_type
= RTM_DELADDR
;
4282 cmd
->nlmsg_flags
= NLM_F_REQUEST
;
4283 ifa
= mnl_nlmsg_put_extra_header(cmd
, sizeof(*ifa
));
4284 ifa
->ifa_flags
= IFA_F_PERMANENT
;
4285 ifa
->ifa_scope
= RT_SCOPE_LINK
;
4286 ifa
->ifa_index
= ctx
->ifindex
;
4287 if (family
== AF_INET
) {
4288 ifa
->ifa_family
= AF_INET
;
4289 ifa
->ifa_prefixlen
= 32;
4290 mnl_attr_put_u32(cmd
, IFA_LOCAL
, mnl_attr_get_u32(na_local
));
4291 mnl_attr_put_u32(cmd
, IFA_ADDRESS
, mnl_attr_get_u32(na_peer
));
4293 ifa
->ifa_family
= AF_INET6
;
4294 ifa
->ifa_prefixlen
= 128;
4295 mnl_attr_put(cmd
, IFA_LOCAL
, IPV6_ADDR_LEN
,
4296 mnl_attr_get_payload(na_local
));
4297 mnl_attr_put(cmd
, IFA_ADDRESS
, IPV6_ADDR_LEN
,
4298 mnl_attr_get_payload(na_peer
));
4300 assert(size
== cmd
->nlmsg_len
);
4305 * Cleanup the local IP addresses on outer interface.
4308 * Context object initialized by mlx5_flow_tcf_context_create().
4309 * @param[in] ifindex
4310 * Network interface index to perform cleanup.
4313 flow_tcf_encap_local_cleanup(struct mlx5_flow_tcf_context
*tcf
,
4314 unsigned int ifindex
)
4316 struct nlmsghdr
*nlh
;
4317 struct ifaddrmsg
*ifa
;
4318 struct tcf_nlcb_context ctx
= {
4320 .bufsize
= MNL_REQUEST_SIZE
,
4321 .nlbuf
= LIST_HEAD_INITIALIZER(),
4327 * Seek and destroy leftovers of local IP addresses with
4328 * matching properties "scope link".
4330 nlh
= mnl_nlmsg_put_header(tcf
->buf
);
4331 nlh
->nlmsg_type
= RTM_GETADDR
;
4332 nlh
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
4333 ifa
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*ifa
));
4334 ifa
->ifa_family
= AF_UNSPEC
;
4335 ifa
->ifa_index
= ifindex
;
4336 ifa
->ifa_scope
= RT_SCOPE_LINK
;
4337 ret
= flow_tcf_nl_ack(tcf
, nlh
, flow_tcf_collect_local_cb
, &ctx
);
4339 DRV_LOG(WARNING
, "netlink: query device list error %d", ret
);
4340 ret
= flow_tcf_send_nlcmd(tcf
, &ctx
);
4342 DRV_LOG(WARNING
, "netlink: device delete error %d", ret
);
4346 * Collect neigh permanent rules on specified network device.
4347 * This is callback routine called by libmnl mnl_cb_run() in loop for
4348 * every message in received packet.
4351 * Pointer to reply header.
4352 * @param[in, out] arg
4353 * Opaque data pointer for this callback.
4356 * A positive, nonzero value on success, negative errno value otherwise
4357 * and rte_errno is set.
4360 flow_tcf_collect_neigh_cb(const struct nlmsghdr
*nlh
, void *arg
)
4362 struct tcf_nlcb_context
*ctx
= arg
;
4363 struct nlmsghdr
*cmd
;
4366 struct nlattr
*na_ip
= NULL
;
4367 struct nlattr
*na_mac
= NULL
;
4368 unsigned char family
;
4371 if (nlh
->nlmsg_type
!= RTM_NEWNEIGH
) {
4375 ndm
= mnl_nlmsg_get_payload(nlh
);
4376 family
= ndm
->ndm_family
;
4377 if (ndm
->ndm_ifindex
!= (int)ctx
->ifindex
||
4378 !(ndm
->ndm_state
& NUD_PERMANENT
) ||
4379 (family
!= AF_INET
&& family
!= AF_INET6
))
4381 mnl_attr_for_each(na
, nlh
, sizeof(*ndm
)) {
4382 switch (mnl_attr_get_type(na
)) {
4390 if (na_mac
&& na_ip
)
4393 if (!na_mac
|| !na_ip
)
4395 /* Neigh rule with permanent attribute found. */
4396 size
= MNL_ALIGN(sizeof(struct nlmsghdr
)) +
4397 MNL_ALIGN(sizeof(struct ndmsg
)) +
4398 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN
) +
4399 (family
== AF_INET6
? SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN
)
4400 : SZ_NLATTR_TYPE_OF(uint32_t));
4401 cmd
= flow_tcf_alloc_nlcmd(ctx
, size
);
4406 cmd
= mnl_nlmsg_put_header(cmd
);
4407 cmd
->nlmsg_type
= RTM_DELNEIGH
;
4408 cmd
->nlmsg_flags
= NLM_F_REQUEST
;
4409 ndm
= mnl_nlmsg_put_extra_header(cmd
, sizeof(*ndm
));
4410 ndm
->ndm_ifindex
= ctx
->ifindex
;
4411 ndm
->ndm_state
= NUD_PERMANENT
;
4414 if (family
== AF_INET
) {
4415 ndm
->ndm_family
= AF_INET
;
4416 mnl_attr_put_u32(cmd
, NDA_DST
, mnl_attr_get_u32(na_ip
));
4418 ndm
->ndm_family
= AF_INET6
;
4419 mnl_attr_put(cmd
, NDA_DST
, IPV6_ADDR_LEN
,
4420 mnl_attr_get_payload(na_ip
));
4422 mnl_attr_put(cmd
, NDA_LLADDR
, ETHER_ADDR_LEN
,
4423 mnl_attr_get_payload(na_mac
));
4424 assert(size
== cmd
->nlmsg_len
);
4429 * Cleanup the neigh rules on outer interface.
4432 * Context object initialized by mlx5_flow_tcf_context_create().
4433 * @param[in] ifindex
4434 * Network interface index to perform cleanup.
4437 flow_tcf_encap_neigh_cleanup(struct mlx5_flow_tcf_context
*tcf
,
4438 unsigned int ifindex
)
4440 struct nlmsghdr
*nlh
;
4442 struct tcf_nlcb_context ctx
= {
4444 .bufsize
= MNL_REQUEST_SIZE
,
4445 .nlbuf
= LIST_HEAD_INITIALIZER(),
4450 /* Seek and destroy leftovers of neigh rules. */
4451 nlh
= mnl_nlmsg_put_header(tcf
->buf
);
4452 nlh
->nlmsg_type
= RTM_GETNEIGH
;
4453 nlh
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
4454 ndm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*ndm
));
4455 ndm
->ndm_family
= AF_UNSPEC
;
4456 ndm
->ndm_ifindex
= ifindex
;
4457 ndm
->ndm_state
= NUD_PERMANENT
;
4458 ret
= flow_tcf_nl_ack(tcf
, nlh
, flow_tcf_collect_neigh_cb
, &ctx
);
4460 DRV_LOG(WARNING
, "netlink: query device list error %d", ret
);
4461 ret
= flow_tcf_send_nlcmd(tcf
, &ctx
);
4463 DRV_LOG(WARNING
, "netlink: device delete error %d", ret
);
4467 * Collect indices of VXLAN encap/decap interfaces associated with device.
4468 * This is callback routine called by libmnl mnl_cb_run() in loop for
4469 * every message in received packet.
4472 * Pointer to reply header.
4473 * @param[in, out] arg
4474 * Opaque data pointer for this callback.
4477 * A positive, nonzero value on success, negative errno value otherwise
4478 * and rte_errno is set.
4481 flow_tcf_collect_vxlan_cb(const struct nlmsghdr
*nlh
, void *arg
)
4483 struct tcf_nlcb_context
*ctx
= arg
;
4484 struct nlmsghdr
*cmd
;
4485 struct ifinfomsg
*ifm
;
4487 struct nlattr
*na_info
= NULL
;
4488 struct nlattr
*na_vxlan
= NULL
;
4490 unsigned int vxindex
;
4493 if (nlh
->nlmsg_type
!= RTM_NEWLINK
) {
4497 ifm
= mnl_nlmsg_get_payload(nlh
);
4498 if (!ifm
->ifi_index
) {
4502 mnl_attr_for_each(na
, nlh
, sizeof(*ifm
))
4503 if (mnl_attr_get_type(na
) == IFLA_LINKINFO
) {
4509 mnl_attr_for_each_nested(na
, na_info
) {
4510 switch (mnl_attr_get_type(na
)) {
4511 case IFLA_INFO_KIND
:
4512 if (!strncmp("vxlan", mnl_attr_get_str(na
),
4513 mnl_attr_get_len(na
)))
4516 case IFLA_INFO_DATA
:
4520 if (found
&& na_vxlan
)
4523 if (!found
|| !na_vxlan
)
4526 mnl_attr_for_each_nested(na
, na_vxlan
) {
4527 if (mnl_attr_get_type(na
) == IFLA_VXLAN_LINK
&&
4528 mnl_attr_get_u32(na
) == ctx
->ifindex
) {
4535 /* Attached VXLAN device found, store the command to delete. */
4536 vxindex
= ifm
->ifi_index
;
4537 size
= MNL_ALIGN(sizeof(struct nlmsghdr
)) +
4538 MNL_ALIGN(sizeof(struct ifinfomsg
));
4539 cmd
= flow_tcf_alloc_nlcmd(ctx
, size
);
4544 cmd
= mnl_nlmsg_put_header(cmd
);
4545 cmd
->nlmsg_type
= RTM_DELLINK
;
4546 cmd
->nlmsg_flags
= NLM_F_REQUEST
;
4547 ifm
= mnl_nlmsg_put_extra_header(cmd
, sizeof(*ifm
));
4548 ifm
->ifi_family
= AF_UNSPEC
;
4549 ifm
->ifi_index
= vxindex
;
4550 assert(size
== cmd
->nlmsg_len
);
4555 * Cleanup the outer interface. Removes all found vxlan devices
4556 * attached to specified index, flushes the neigh and local IP
4560 * Context object initialized by mlx5_flow_tcf_context_create().
4561 * @param[in] ifindex
4562 * Network inferface index to perform cleanup.
4565 flow_tcf_encap_iface_cleanup(struct mlx5_flow_tcf_context
*tcf
,
4566 unsigned int ifindex
)
4568 struct nlmsghdr
*nlh
;
4569 struct ifinfomsg
*ifm
;
4570 struct tcf_nlcb_context ctx
= {
4572 .bufsize
= MNL_REQUEST_SIZE
,
4573 .nlbuf
= LIST_HEAD_INITIALIZER(),
4579 * Seek and destroy leftover VXLAN encap/decap interfaces with
4580 * matching properties.
4582 nlh
= mnl_nlmsg_put_header(tcf
->buf
);
4583 nlh
->nlmsg_type
= RTM_GETLINK
;
4584 nlh
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
4585 ifm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*ifm
));
4586 ifm
->ifi_family
= AF_UNSPEC
;
4587 ret
= flow_tcf_nl_ack(tcf
, nlh
, flow_tcf_collect_vxlan_cb
, &ctx
);
4589 DRV_LOG(WARNING
, "netlink: query device list error %d", ret
);
4590 ret
= flow_tcf_send_nlcmd(tcf
, &ctx
);
4592 DRV_LOG(WARNING
, "netlink: device delete error %d", ret
);
4596 * Emit Netlink message to add/remove local address to the outer device.
4597 * The address being added is visible within the link only (scope link).
4599 * Note that an implicit route is maintained by the kernel due to the
4600 * presence of a peer address (IFA_ADDRESS).
4602 * These rules are used for encapsulation only and allow to assign
4603 * the outer tunnel source IP address.
4606 * Libmnl socket context object.
4608 * Encapsulation properties (source address and its peer).
4609 * @param[in] ifindex
4610 * Network interface to apply rule.
4612 * Toggle between add and remove.
4614 * Perform verbose error reporting if not NULL.
4617 * 0 on success, a negative errno value otherwise and rte_errno is set.
4620 flow_tcf_rule_local(struct mlx5_flow_tcf_context
*tcf
,
4621 const struct flow_tcf_vxlan_encap
*encap
,
4622 unsigned int ifindex
,
4624 struct rte_flow_error
*error
)
4626 struct nlmsghdr
*nlh
;
4627 struct ifaddrmsg
*ifa
;
4628 alignas(struct nlmsghdr
)
4629 uint8_t buf
[mnl_nlmsg_size(sizeof(*ifa
) + 128)];
4631 nlh
= mnl_nlmsg_put_header(buf
);
4632 nlh
->nlmsg_type
= enable
? RTM_NEWADDR
: RTM_DELADDR
;
4634 NLM_F_REQUEST
| (enable
? NLM_F_CREATE
| NLM_F_REPLACE
: 0);
4636 ifa
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*ifa
));
4637 ifa
->ifa_flags
= IFA_F_PERMANENT
;
4638 ifa
->ifa_scope
= RT_SCOPE_LINK
;
4639 ifa
->ifa_index
= ifindex
;
4640 if (encap
->mask
& FLOW_TCF_ENCAP_IPV4_SRC
) {
4641 ifa
->ifa_family
= AF_INET
;
4642 ifa
->ifa_prefixlen
= 32;
4643 mnl_attr_put_u32(nlh
, IFA_LOCAL
, encap
->ipv4
.src
);
4644 if (encap
->mask
& FLOW_TCF_ENCAP_IPV4_DST
)
4645 mnl_attr_put_u32(nlh
, IFA_ADDRESS
,
4648 assert(encap
->mask
& FLOW_TCF_ENCAP_IPV6_SRC
);
4649 ifa
->ifa_family
= AF_INET6
;
4650 ifa
->ifa_prefixlen
= 128;
4651 mnl_attr_put(nlh
, IFA_LOCAL
,
4652 sizeof(encap
->ipv6
.src
),
4654 if (encap
->mask
& FLOW_TCF_ENCAP_IPV6_DST
)
4655 mnl_attr_put(nlh
, IFA_ADDRESS
,
4656 sizeof(encap
->ipv6
.dst
),
4659 if (!flow_tcf_nl_ack(tcf
, nlh
, NULL
, NULL
))
4661 return rte_flow_error_set(error
, rte_errno
,
4662 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
4663 "netlink: cannot complete IFA request"
4668 * Emit Netlink message to add/remove neighbor.
4671 * Libmnl socket context object.
4673 * Encapsulation properties (destination address).
4674 * @param[in] ifindex
4675 * Network interface.
4677 * Toggle between add and remove.
4679 * Perform verbose error reporting if not NULL.
4682 * 0 on success, a negative errno value otherwise and rte_errno is set.
4685 flow_tcf_rule_neigh(struct mlx5_flow_tcf_context
*tcf
,
4686 const struct flow_tcf_vxlan_encap
*encap
,
4687 unsigned int ifindex
,
4689 struct rte_flow_error
*error
)
4691 struct nlmsghdr
*nlh
;
4693 alignas(struct nlmsghdr
)
4694 uint8_t buf
[mnl_nlmsg_size(sizeof(*ndm
) + 128)];
4696 nlh
= mnl_nlmsg_put_header(buf
);
4697 nlh
->nlmsg_type
= enable
? RTM_NEWNEIGH
: RTM_DELNEIGH
;
4699 NLM_F_REQUEST
| (enable
? NLM_F_CREATE
| NLM_F_REPLACE
: 0);
4701 ndm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*ndm
));
4702 ndm
->ndm_ifindex
= ifindex
;
4703 ndm
->ndm_state
= NUD_PERMANENT
;
4706 if (encap
->mask
& FLOW_TCF_ENCAP_IPV4_DST
) {
4707 ndm
->ndm_family
= AF_INET
;
4708 mnl_attr_put_u32(nlh
, NDA_DST
, encap
->ipv4
.dst
);
4710 assert(encap
->mask
& FLOW_TCF_ENCAP_IPV6_DST
);
4711 ndm
->ndm_family
= AF_INET6
;
4712 mnl_attr_put(nlh
, NDA_DST
, sizeof(encap
->ipv6
.dst
),
4715 if (encap
->mask
& FLOW_TCF_ENCAP_ETH_SRC
&& enable
)
4717 "outer ethernet source address cannot be "
4718 "forced for VXLAN encapsulation");
4719 if (encap
->mask
& FLOW_TCF_ENCAP_ETH_DST
)
4720 mnl_attr_put(nlh
, NDA_LLADDR
, sizeof(encap
->eth
.dst
),
4722 if (!flow_tcf_nl_ack(tcf
, nlh
, NULL
, NULL
))
4724 return rte_flow_error_set(error
, rte_errno
,
4725 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
4726 "netlink: cannot complete ND request"
4731 * Manage the local IP addresses and their peers IP addresses on the
4732 * outer interface for encapsulation purposes. The kernel searches the
4733 * appropriate device for tunnel egress traffic using the outer source
4734 * IP, this IP should be assigned to the outer network device, otherwise
4735 * kernel rejects the rule.
4737 * Adds or removes the addresses using the Netlink command like this:
4738 * ip addr add <src_ip> peer <dst_ip> scope link dev <ifouter>
4740 * The addresses are local to the netdev ("scope link"), this reduces
4741 * the risk of conflicts. Note that an implicit route is maintained by
4742 * the kernel due to the presence of a peer address (IFA_ADDRESS).
4745 * Libmnl socket context object.
4747 * Object, contains rule database and ifouter index.
4748 * @param[in] dev_flow
4749 * Flow object, contains the tunnel parameters (for encap only).
4751 * Toggle between add and remove.
4753 * Perform verbose error reporting if not NULL.
4756 * 0 on success, a negative errno value otherwise and rte_errno is set.
4759 flow_tcf_encap_local(struct mlx5_flow_tcf_context
*tcf
,
4760 struct tcf_irule
*iface
,
4761 struct mlx5_flow
*dev_flow
,
4763 struct rte_flow_error
*error
)
4765 const struct flow_tcf_vxlan_encap
*encap
= dev_flow
->tcf
.vxlan_encap
;
4766 struct tcf_local_rule
*rule
= NULL
;
4770 assert(encap
->hdr
.type
== FLOW_TCF_TUNACT_VXLAN_ENCAP
);
4771 if (encap
->mask
& FLOW_TCF_ENCAP_IPV4_SRC
) {
4772 assert(encap
->mask
& FLOW_TCF_ENCAP_IPV4_DST
);
4773 LIST_FOREACH(rule
, &iface
->local
, next
) {
4774 if (rule
->mask
& FLOW_TCF_ENCAP_IPV4_SRC
&&
4775 encap
->ipv4
.src
== rule
->ipv4
.src
&&
4776 encap
->ipv4
.dst
== rule
->ipv4
.dst
) {
4781 assert(encap
->mask
& FLOW_TCF_ENCAP_IPV6_SRC
);
4782 assert(encap
->mask
& FLOW_TCF_ENCAP_IPV6_DST
);
4783 LIST_FOREACH(rule
, &iface
->local
, next
) {
4784 if (rule
->mask
& FLOW_TCF_ENCAP_IPV6_SRC
&&
4785 !memcmp(&encap
->ipv6
.src
, &rule
->ipv6
.src
,
4786 sizeof(encap
->ipv6
.src
)) &&
4787 !memcmp(&encap
->ipv6
.dst
, &rule
->ipv6
.dst
,
4788 sizeof(encap
->ipv6
.dst
))) {
4798 if (!rule
->refcnt
|| !--rule
->refcnt
) {
4799 LIST_REMOVE(rule
, next
);
4800 return flow_tcf_rule_local(tcf
, encap
,
4801 iface
->ifouter
, false, error
);
4806 DRV_LOG(WARNING
, "disabling not existing local rule");
4807 rte_flow_error_set(error
, ENOENT
,
4808 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
4809 "disabling not existing local rule");
4812 rule
= rte_zmalloc(__func__
, sizeof(struct tcf_local_rule
),
4813 alignof(struct tcf_local_rule
));
4815 rte_flow_error_set(error
, ENOMEM
,
4816 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
4817 "unable to allocate memory for local rule");
4820 *rule
= (struct tcf_local_rule
){.refcnt
= 0,
4823 if (encap
->mask
& FLOW_TCF_ENCAP_IPV4_SRC
) {
4824 rule
->mask
= FLOW_TCF_ENCAP_IPV4_SRC
4825 | FLOW_TCF_ENCAP_IPV4_DST
;
4826 rule
->ipv4
.src
= encap
->ipv4
.src
;
4827 rule
->ipv4
.dst
= encap
->ipv4
.dst
;
4829 rule
->mask
= FLOW_TCF_ENCAP_IPV6_SRC
4830 | FLOW_TCF_ENCAP_IPV6_DST
;
4831 memcpy(&rule
->ipv6
.src
, &encap
->ipv6
.src
, IPV6_ADDR_LEN
);
4832 memcpy(&rule
->ipv6
.dst
, &encap
->ipv6
.dst
, IPV6_ADDR_LEN
);
4834 ret
= flow_tcf_rule_local(tcf
, encap
, iface
->ifouter
, true, error
);
4840 LIST_INSERT_HEAD(&iface
->local
, rule
, next
);
4845 * Manage the destination MAC/IP addresses neigh database, kernel uses
4846 * this one to determine the destination MAC address within encapsulation
4847 * header. Adds or removes the entries using the Netlink command like this:
4848 * ip neigh add dev <ifouter> lladdr <dst_mac> to <dst_ip> nud permanent
4851 * Libmnl socket context object.
4853 * Object, contains rule database and ifouter index.
4854 * @param[in] dev_flow
4855 * Flow object, contains the tunnel parameters (for encap only).
4857 * Toggle between add and remove.
4859 * Perform verbose error reporting if not NULL.
4862 * 0 on success, a negative errno value otherwise and rte_errno is set.
4865 flow_tcf_encap_neigh(struct mlx5_flow_tcf_context
*tcf
,
4866 struct tcf_irule
*iface
,
4867 struct mlx5_flow
*dev_flow
,
4869 struct rte_flow_error
*error
)
4871 const struct flow_tcf_vxlan_encap
*encap
= dev_flow
->tcf
.vxlan_encap
;
4872 struct tcf_neigh_rule
*rule
= NULL
;
4876 assert(encap
->hdr
.type
== FLOW_TCF_TUNACT_VXLAN_ENCAP
);
4877 if (encap
->mask
& FLOW_TCF_ENCAP_IPV4_DST
) {
4878 assert(encap
->mask
& FLOW_TCF_ENCAP_IPV4_SRC
);
4879 LIST_FOREACH(rule
, &iface
->neigh
, next
) {
4880 if (rule
->mask
& FLOW_TCF_ENCAP_IPV4_DST
&&
4881 encap
->ipv4
.dst
== rule
->ipv4
.dst
) {
4886 assert(encap
->mask
& FLOW_TCF_ENCAP_IPV6_SRC
);
4887 assert(encap
->mask
& FLOW_TCF_ENCAP_IPV6_DST
);
4888 LIST_FOREACH(rule
, &iface
->neigh
, next
) {
4889 if (rule
->mask
& FLOW_TCF_ENCAP_IPV6_DST
&&
4890 !memcmp(&encap
->ipv6
.dst
, &rule
->ipv6
.dst
,
4891 sizeof(encap
->ipv6
.dst
))) {
4897 if (memcmp(&encap
->eth
.dst
, &rule
->eth
,
4898 sizeof(encap
->eth
.dst
))) {
4899 DRV_LOG(WARNING
, "Destination MAC differs"
4901 rte_flow_error_set(error
, EEXIST
,
4902 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
,
4903 NULL
, "Different MAC address"
4904 " neigh rule for the same"
4912 if (!rule
->refcnt
|| !--rule
->refcnt
) {
4913 LIST_REMOVE(rule
, next
);
4914 return flow_tcf_rule_neigh(tcf
, encap
,
4921 DRV_LOG(WARNING
, "Disabling not existing neigh rule");
4922 rte_flow_error_set(error
, ENOENT
,
4923 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
4924 "unable to allocate memory for neigh rule");
4927 rule
= rte_zmalloc(__func__
, sizeof(struct tcf_neigh_rule
),
4928 alignof(struct tcf_neigh_rule
));
4930 rte_flow_error_set(error
, ENOMEM
,
4931 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
4932 "unable to allocate memory for neigh rule");
4935 *rule
= (struct tcf_neigh_rule
){.refcnt
= 0,
4938 if (encap
->mask
& FLOW_TCF_ENCAP_IPV4_DST
) {
4939 rule
->mask
= FLOW_TCF_ENCAP_IPV4_DST
;
4940 rule
->ipv4
.dst
= encap
->ipv4
.dst
;
4942 rule
->mask
= FLOW_TCF_ENCAP_IPV6_DST
;
4943 memcpy(&rule
->ipv6
.dst
, &encap
->ipv6
.dst
, IPV6_ADDR_LEN
);
4945 memcpy(&rule
->eth
, &encap
->eth
.dst
, sizeof(rule
->eth
));
4946 ret
= flow_tcf_rule_neigh(tcf
, encap
, iface
->ifouter
, true, error
);
4952 LIST_INSERT_HEAD(&iface
->neigh
, rule
, next
);
4956 /* VXLAN encap rule database for outer interfaces. */
4957 static LIST_HEAD(, tcf_irule
) iface_list_vxlan
= LIST_HEAD_INITIALIZER();
4959 /* VTEP device list is shared between PMD port instances. */
4960 static LIST_HEAD(, tcf_vtep
) vtep_list_vxlan
= LIST_HEAD_INITIALIZER();
4961 static pthread_mutex_t vtep_list_mutex
= PTHREAD_MUTEX_INITIALIZER
;
4964 * Acquire the VXLAN encap rules container for specified interface.
4965 * First looks for the container in the existing ones list, creates
4966 * and initializes the new container if existing not found.
4969 * Context object initialized by mlx5_flow_tcf_context_create().
4970 * @param[in] ifouter
4971 * Network interface index to create VXLAN encap rules on.
4973 * Perform verbose error reporting if not NULL.
4975 * Rule container pointer on success,
4976 * NULL otherwise and rte_errno is set.
4978 static struct tcf_irule
*
4979 flow_tcf_encap_irule_acquire(struct mlx5_flow_tcf_context
*tcf
,
4980 unsigned int ifouter
,
4981 struct rte_flow_error
*error
)
4983 struct tcf_irule
*iface
;
4985 /* Look whether the container for encap rules is created. */
4987 LIST_FOREACH(iface
, &iface_list_vxlan
, next
) {
4988 if (iface
->ifouter
== ifouter
)
4992 /* Container already exists, just increment the reference. */
4996 /* Not found, we should create the new container. */
4997 iface
= rte_zmalloc(__func__
, sizeof(*iface
),
4998 alignof(struct tcf_irule
));
5000 rte_flow_error_set(error
, ENOMEM
,
5001 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
5002 "unable to allocate memory for container");
5005 *iface
= (struct tcf_irule
){
5006 .local
= LIST_HEAD_INITIALIZER(),
5007 .neigh
= LIST_HEAD_INITIALIZER(),
5011 /* Interface cleanup for new container created. */
5012 flow_tcf_encap_iface_cleanup(tcf
, ifouter
);
5013 flow_tcf_encap_local_cleanup(tcf
, ifouter
);
5014 flow_tcf_encap_neigh_cleanup(tcf
, ifouter
);
5015 LIST_INSERT_HEAD(&iface_list_vxlan
, iface
, next
);
5020 * Releases VXLAN encap rules container by pointer. Decrements the
5021 * reference counter and deletes the container if counter is zero.
5024 * VXLAN rule container pointer to release.
5027 flow_tcf_encap_irule_release(struct tcf_irule
*iface
)
5029 assert(iface
->refcnt
);
5030 if (--iface
->refcnt
== 0) {
5031 /* Reference counter is zero, delete the container. */
5032 assert(LIST_EMPTY(&iface
->local
));
5033 assert(LIST_EMPTY(&iface
->neigh
));
5034 LIST_REMOVE(iface
, next
);
5040 * Deletes VTEP network device.
5043 * Context object initialized by mlx5_flow_tcf_context_create().
5045 * Object representing the network device to delete. Memory
5046 * allocated for this object is freed by routine.
5049 flow_tcf_vtep_delete(struct mlx5_flow_tcf_context
*tcf
,
5050 struct tcf_vtep
*vtep
)
5052 struct nlmsghdr
*nlh
;
5053 struct ifinfomsg
*ifm
;
5054 alignas(struct nlmsghdr
)
5055 uint8_t buf
[mnl_nlmsg_size(MNL_ALIGN(sizeof(*ifm
))) +
5056 MNL_BUF_EXTRA_SPACE
];
5059 assert(!vtep
->refcnt
);
5060 /* Delete only ifaces those we actually created. */
5061 if (vtep
->created
&& vtep
->ifindex
) {
5062 DRV_LOG(INFO
, "VTEP delete (%d)", vtep
->ifindex
);
5063 nlh
= mnl_nlmsg_put_header(buf
);
5064 nlh
->nlmsg_type
= RTM_DELLINK
;
5065 nlh
->nlmsg_flags
= NLM_F_REQUEST
;
5066 ifm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*ifm
));
5067 ifm
->ifi_family
= AF_UNSPEC
;
5068 ifm
->ifi_index
= vtep
->ifindex
;
5069 assert(sizeof(buf
) >= nlh
->nlmsg_len
);
5070 ret
= flow_tcf_nl_ack(tcf
, nlh
, NULL
, NULL
);
5072 DRV_LOG(WARNING
, "netlink: error deleting vxlan"
5073 " encap/decap ifindex %u",
5080 * Creates VTEP network device.
5083 * Context object initialized by mlx5_flow_tcf_context_create().
5085 * UDP port of created VTEP device.
5087 * Perform verbose error reporting if not NULL.
5090 * Pointer to created device structure on success,
5091 * NULL otherwise and rte_errno is set.
5093 static struct tcf_vtep
*
5094 flow_tcf_vtep_create(struct mlx5_flow_tcf_context
*tcf
,
5095 uint16_t port
, struct rte_flow_error
*error
)
5097 struct tcf_vtep
*vtep
;
5098 struct nlmsghdr
*nlh
;
5099 struct ifinfomsg
*ifm
;
5100 char name
[sizeof(MLX5_VXLAN_DEVICE_PFX
) + 24];
5101 alignas(struct nlmsghdr
)
5102 uint8_t buf
[mnl_nlmsg_size(sizeof(*ifm
)) +
5103 SZ_NLATTR_DATA_OF(sizeof(name
)) +
5104 SZ_NLATTR_NEST
* 2 +
5105 SZ_NLATTR_STRZ_OF("vxlan") +
5106 SZ_NLATTR_DATA_OF(sizeof(uint32_t)) +
5107 SZ_NLATTR_DATA_OF(sizeof(uint16_t)) +
5108 SZ_NLATTR_DATA_OF(sizeof(uint8_t)) * 3 +
5109 MNL_BUF_EXTRA_SPACE
];
5110 struct nlattr
*na_info
;
5111 struct nlattr
*na_vxlan
;
5112 rte_be16_t vxlan_port
= rte_cpu_to_be_16(port
);
5115 vtep
= rte_zmalloc(__func__
, sizeof(*vtep
), alignof(struct tcf_vtep
));
5117 rte_flow_error_set(error
, ENOMEM
,
5118 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
5119 "unable to allocate memory for VTEP");
5122 *vtep
= (struct tcf_vtep
){
5125 memset(buf
, 0, sizeof(buf
));
5126 nlh
= mnl_nlmsg_put_header(buf
);
5127 nlh
->nlmsg_type
= RTM_NEWLINK
;
5128 nlh
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
5129 ifm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*ifm
));
5130 ifm
->ifi_family
= AF_UNSPEC
;
5133 ifm
->ifi_flags
= IFF_UP
;
5134 ifm
->ifi_change
= 0xffffffff;
5135 snprintf(name
, sizeof(name
), "%s%u", MLX5_VXLAN_DEVICE_PFX
, port
);
5136 mnl_attr_put_strz(nlh
, IFLA_IFNAME
, name
);
5137 na_info
= mnl_attr_nest_start(nlh
, IFLA_LINKINFO
);
5139 mnl_attr_put_strz(nlh
, IFLA_INFO_KIND
, "vxlan");
5140 na_vxlan
= mnl_attr_nest_start(nlh
, IFLA_INFO_DATA
);
5142 #ifdef HAVE_IFLA_VXLAN_COLLECT_METADATA
5144 * RH 7.2 does not support metadata for tunnel device.
5145 * It does not matter because we are going to use the
5146 * hardware offload by mlx5 driver.
5148 mnl_attr_put_u8(nlh
, IFLA_VXLAN_COLLECT_METADATA
, 1);
5150 mnl_attr_put_u8(nlh
, IFLA_VXLAN_UDP_ZERO_CSUM6_RX
, 1);
5151 mnl_attr_put_u8(nlh
, IFLA_VXLAN_LEARNING
, 0);
5152 mnl_attr_put_u16(nlh
, IFLA_VXLAN_PORT
, vxlan_port
);
5153 #ifndef HAVE_IFLA_VXLAN_COLLECT_METADATA
5155 * We must specify VNI explicitly if metadata not supported.
5156 * Note, VNI is transferred with native endianness format.
5158 mnl_attr_put_u16(nlh
, IFLA_VXLAN_ID
, MLX5_VXLAN_DEFAULT_VNI
);
5160 mnl_attr_nest_end(nlh
, na_vxlan
);
5161 mnl_attr_nest_end(nlh
, na_info
);
5162 assert(sizeof(buf
) >= nlh
->nlmsg_len
);
5163 ret
= flow_tcf_nl_ack(tcf
, nlh
, NULL
, NULL
);
5166 "netlink: VTEP %s create failure (%d)",
5168 if (rte_errno
!= EEXIST
)
5170 * Some unhandled error occurred or device is
5171 * for encapsulation and cannot be shared.
5176 * Mark device we actually created.
5177 * We should explicitly delete
5178 * when we do not need it anymore.
5183 /* Try to get ifindex of created of pre-existing device. */
5184 ret
= if_nametoindex(name
);
5187 "VTEP %s failed to get index (%d)", name
, errno
);
5190 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
5191 "netlink: failed to retrieve VTEP ifindex");
5194 vtep
->ifindex
= ret
;
5195 memset(buf
, 0, sizeof(buf
));
5196 nlh
= mnl_nlmsg_put_header(buf
);
5197 nlh
->nlmsg_type
= RTM_NEWLINK
;
5198 nlh
->nlmsg_flags
= NLM_F_REQUEST
;
5199 ifm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*ifm
));
5200 ifm
->ifi_family
= AF_UNSPEC
;
5202 ifm
->ifi_index
= vtep
->ifindex
;
5203 ifm
->ifi_flags
= IFF_UP
;
5204 ifm
->ifi_change
= IFF_UP
;
5205 ret
= flow_tcf_nl_ack(tcf
, nlh
, NULL
, NULL
);
5207 rte_flow_error_set(error
, -errno
,
5208 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
5209 "netlink: failed to set VTEP link up");
5210 DRV_LOG(WARNING
, "netlink: VTEP %s set link up failure (%d)",
5214 ret
= mlx5_flow_tcf_init(tcf
, vtep
->ifindex
, error
);
5216 DRV_LOG(WARNING
, "VTEP %s init failure (%d)", name
, rte_errno
);
5219 DRV_LOG(INFO
, "VTEP create (%d, %d)", vtep
->port
, vtep
->ifindex
);
5223 flow_tcf_vtep_delete(tcf
, vtep
);
5231 * Acquire target interface index for VXLAN tunneling decapsulation.
5232 * In order to share the UDP port within the other interfaces the
5233 * VXLAN device created as not attached to any interface (if created).
5236 * Context object initialized by mlx5_flow_tcf_context_create().
5237 * @param[in] dev_flow
5238 * Flow tcf object with tunnel structure pointer set.
5240 * Perform verbose error reporting if not NULL.
5242 * Interface descriptor pointer on success,
5243 * NULL otherwise and rte_errno is set.
5245 static struct tcf_vtep
*
5246 flow_tcf_decap_vtep_acquire(struct mlx5_flow_tcf_context
*tcf
,
5247 struct mlx5_flow
*dev_flow
,
5248 struct rte_flow_error
*error
)
5250 struct tcf_vtep
*vtep
;
5251 uint16_t port
= dev_flow
->tcf
.vxlan_decap
->udp_port
;
5253 LIST_FOREACH(vtep
, &vtep_list_vxlan
, next
) {
5254 if (vtep
->port
== port
)
5258 /* Device exists, just increment the reference counter. */
5260 assert(vtep
->ifindex
);
5263 /* No decapsulation device exists, try to create the new one. */
5264 vtep
= flow_tcf_vtep_create(tcf
, port
, error
);
5266 LIST_INSERT_HEAD(&vtep_list_vxlan
, vtep
, next
);
5271 * Acquire target interface index for VXLAN tunneling encapsulation.
5274 * Context object initialized by mlx5_flow_tcf_context_create().
5275 * @param[in] ifouter
5276 * Network interface index to attach VXLAN encap device to.
5277 * @param[in] dev_flow
5278 * Flow tcf object with tunnel structure pointer set.
5280 * Perform verbose error reporting if not NULL.
5282 * Interface descriptor pointer on success,
5283 * NULL otherwise and rte_errno is set.
5285 static struct tcf_vtep
*
5286 flow_tcf_encap_vtep_acquire(struct mlx5_flow_tcf_context
*tcf
,
5287 unsigned int ifouter
,
5288 struct mlx5_flow
*dev_flow
,
5289 struct rte_flow_error
*error
)
5291 static uint16_t port
;
5292 struct tcf_vtep
*vtep
;
5293 struct tcf_irule
*iface
;
5297 /* Look whether the VTEP for specified port is created. */
5298 port
= rte_be_to_cpu_16(dev_flow
->tcf
.vxlan_encap
->udp
.dst
);
5299 LIST_FOREACH(vtep
, &vtep_list_vxlan
, next
) {
5300 if (vtep
->port
== port
)
5304 /* VTEP already exists, just increment the reference. */
5307 /* Not found, we should create the new VTEP. */
5308 vtep
= flow_tcf_vtep_create(tcf
, port
, error
);
5311 LIST_INSERT_HEAD(&vtep_list_vxlan
, vtep
, next
);
5313 assert(vtep
->ifindex
);
5314 iface
= flow_tcf_encap_irule_acquire(tcf
, ifouter
, error
);
5316 if (--vtep
->refcnt
== 0)
5317 flow_tcf_vtep_delete(tcf
, vtep
);
5320 dev_flow
->tcf
.vxlan_encap
->iface
= iface
;
5321 /* Create local ipaddr with peer to specify the outer IPs. */
5322 ret
= flow_tcf_encap_local(tcf
, iface
, dev_flow
, true, error
);
5324 /* Create neigh rule to specify outer destination MAC. */
5325 ret
= flow_tcf_encap_neigh(tcf
, iface
, dev_flow
, true, error
);
5327 flow_tcf_encap_local(tcf
, iface
,
5328 dev_flow
, false, error
);
5331 dev_flow
->tcf
.vxlan_encap
->iface
= NULL
;
5332 flow_tcf_encap_irule_release(iface
);
5333 if (--vtep
->refcnt
== 0)
5334 flow_tcf_vtep_delete(tcf
, vtep
);
5341 * Acquires target interface index for tunneling of any type.
5342 * Creates the new VTEP if needed.
5345 * Context object initialized by mlx5_flow_tcf_context_create().
5346 * @param[in] ifouter
5347 * Network interface index to create VXLAN encap rules on.
5348 * @param[in] dev_flow
5349 * Flow tcf object with tunnel structure pointer set.
5351 * Perform verbose error reporting if not NULL.
5353 * Interface descriptor pointer on success,
5354 * NULL otherwise and rte_errno is set.
5356 static struct tcf_vtep
*
5357 flow_tcf_vtep_acquire(struct mlx5_flow_tcf_context
*tcf
,
5358 unsigned int ifouter
,
5359 struct mlx5_flow
*dev_flow
,
5360 struct rte_flow_error
*error
)
5362 struct tcf_vtep
*vtep
= NULL
;
5364 assert(dev_flow
->tcf
.tunnel
);
5365 pthread_mutex_lock(&vtep_list_mutex
);
5366 switch (dev_flow
->tcf
.tunnel
->type
) {
5367 case FLOW_TCF_TUNACT_VXLAN_ENCAP
:
5368 vtep
= flow_tcf_encap_vtep_acquire(tcf
, ifouter
,
5371 case FLOW_TCF_TUNACT_VXLAN_DECAP
:
5372 vtep
= flow_tcf_decap_vtep_acquire(tcf
, dev_flow
, error
);
5375 rte_flow_error_set(error
, ENOTSUP
,
5376 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
5377 "unsupported tunnel type");
5380 pthread_mutex_unlock(&vtep_list_mutex
);
5385 * Release tunneling interface by ifindex. Decrements reference
5386 * counter and actually removes the device if counter is zero.
5389 * Context object initialized by mlx5_flow_tcf_context_create().
5391 * VTEP device descriptor structure.
5392 * @param[in] dev_flow
5393 * Flow tcf object with tunnel structure pointer set.
5396 flow_tcf_vtep_release(struct mlx5_flow_tcf_context
*tcf
,
5397 struct tcf_vtep
*vtep
,
5398 struct mlx5_flow
*dev_flow
)
5400 assert(dev_flow
->tcf
.tunnel
);
5401 pthread_mutex_lock(&vtep_list_mutex
);
5402 switch (dev_flow
->tcf
.tunnel
->type
) {
5403 case FLOW_TCF_TUNACT_VXLAN_DECAP
:
5405 case FLOW_TCF_TUNACT_VXLAN_ENCAP
: {
5406 struct tcf_irule
*iface
;
5408 /* Remove the encap ancillary rules first. */
5409 iface
= dev_flow
->tcf
.vxlan_encap
->iface
;
5411 flow_tcf_encap_neigh(tcf
, iface
, dev_flow
, false, NULL
);
5412 flow_tcf_encap_local(tcf
, iface
, dev_flow
, false, NULL
);
5413 flow_tcf_encap_irule_release(iface
);
5414 dev_flow
->tcf
.vxlan_encap
->iface
= NULL
;
5419 DRV_LOG(WARNING
, "Unsupported tunnel type");
5422 assert(vtep
->refcnt
);
5423 if (--vtep
->refcnt
== 0) {
5424 LIST_REMOVE(vtep
, next
);
5425 flow_tcf_vtep_delete(tcf
, vtep
);
5427 pthread_mutex_unlock(&vtep_list_mutex
);
5430 struct tcf_nlcb_query
{
5433 uint32_t flags_valid
:1;
5437 * Collect queried rule attributes. This is callback routine called by
5438 * libmnl mnl_cb_run() in loop for every message in received packet.
5439 * Current implementation collects the flower flags only.
5442 * Pointer to reply header.
5443 * @param[in, out] arg
5444 * Context pointer for this callback.
5447 * A positive, nonzero value on success (required by libmnl
5448 * to continue messages processing).
5451 flow_tcf_collect_query_cb(const struct nlmsghdr
*nlh
, void *arg
)
5453 struct tcf_nlcb_query
*query
= arg
;
5454 struct tcmsg
*tcm
= mnl_nlmsg_get_payload(nlh
);
5455 struct nlattr
*na
, *na_opt
;
5456 bool flower
= false;
5458 if (nlh
->nlmsg_type
!= RTM_NEWTFILTER
||
5459 tcm
->tcm_handle
!= query
->handle
)
5461 mnl_attr_for_each(na
, nlh
, sizeof(*tcm
)) {
5462 switch (mnl_attr_get_type(na
)) {
5464 if (strcmp(mnl_attr_get_payload(na
), "flower")) {
5465 /* Not flower filter, drop entire message. */
5472 /* Not flower options, drop entire message. */
5475 /* Check nested flower options. */
5476 mnl_attr_for_each_nested(na_opt
, na
) {
5477 switch (mnl_attr_get_type(na_opt
)) {
5478 case TCA_FLOWER_FLAGS
:
5479 query
->flags_valid
= 1;
5481 mnl_attr_get_u32(na_opt
);
5492 * Query a TC flower rule flags via netlink.
5495 * Context object initialized by mlx5_flow_tcf_context_create().
5496 * @param[in] dev_flow
5497 * Pointer to the flow.
5498 * @param[out] pflags
5499 * pointer to the data retrieved by the query.
5502 * 0 on success, a negative errno value otherwise.
5505 flow_tcf_query_flags(struct mlx5_flow_tcf_context
*tcf
,
5506 struct mlx5_flow
*dev_flow
,
5509 struct nlmsghdr
*nlh
;
5511 struct tcf_nlcb_query query
= {
5512 .handle
= dev_flow
->tcf
.tcm
->tcm_handle
,
5515 nlh
= mnl_nlmsg_put_header(tcf
->buf
);
5516 nlh
->nlmsg_type
= RTM_GETTFILTER
;
5517 nlh
->nlmsg_flags
= NLM_F_REQUEST
;
5518 tcm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*tcm
));
5519 memcpy(tcm
, dev_flow
->tcf
.tcm
, sizeof(*tcm
));
5521 * Ignore Netlink error for filter query operations.
5522 * The reply length is sent by kernel as errno.
5523 * Just check we got the flags option.
5525 flow_tcf_nl_ack(tcf
, nlh
, flow_tcf_collect_query_cb
, &query
);
5526 if (!query
.flags_valid
) {
5530 *pflags
= query
.tc_flags
;
5535 * Query and check the in_hw set for specified rule.
5538 * Context object initialized by mlx5_flow_tcf_context_create().
5539 * @param[in] dev_flow
5540 * Pointer to the flow to check.
5543 * 0 on success, a negative errno value otherwise.
5546 flow_tcf_check_inhw(struct mlx5_flow_tcf_context
*tcf
,
5547 struct mlx5_flow
*dev_flow
)
5552 ret
= flow_tcf_query_flags(tcf
, dev_flow
, &flags
);
5555 return (flags
& TCA_CLS_FLAGS_IN_HW
) ? 0 : -ENOENT
;
5559 * Remove flow from E-Switch by sending Netlink message.
5562 * Pointer to Ethernet device.
5563 * @param[in, out] flow
5564 * Pointer to the sub flow.
5567 flow_tcf_remove(struct rte_eth_dev
*dev
, struct rte_flow
*flow
)
5569 struct mlx5_priv
*priv
= dev
->data
->dev_private
;
5570 struct mlx5_flow_tcf_context
*ctx
= priv
->tcf_context
;
5571 struct mlx5_flow
*dev_flow
;
5572 struct nlmsghdr
*nlh
;
5577 dev_flow
= LIST_FIRST(&flow
->dev_flows
);
5580 /* E-Switch flow can't be expanded. */
5581 assert(!LIST_NEXT(dev_flow
, next
));
5582 if (dev_flow
->tcf
.applied
) {
5583 nlh
= dev_flow
->tcf
.nlh
;
5584 nlh
->nlmsg_type
= RTM_DELTFILTER
;
5585 nlh
->nlmsg_flags
= NLM_F_REQUEST
;
5586 flow_tcf_nl_ack(ctx
, nlh
, NULL
, NULL
);
5587 if (dev_flow
->tcf
.tunnel
) {
5588 assert(dev_flow
->tcf
.tunnel
->vtep
);
5589 flow_tcf_vtep_release(ctx
,
5590 dev_flow
->tcf
.tunnel
->vtep
,
5592 dev_flow
->tcf
.tunnel
->vtep
= NULL
;
5594 /* Cleanup the rule handle value. */
5595 tcm
= mnl_nlmsg_get_payload(nlh
);
5596 tcm
->tcm_handle
= 0;
5597 dev_flow
->tcf
.applied
= 0;
5602 * Fetch the applied rule handle. This is callback routine called by
5603 * libmnl mnl_cb_run() in loop for every message in received packet.
5604 * When the NLM_F_ECHO flag is specified the kernel sends the created
5605 * rule descriptor back to the application and we can retrieve the
5606 * actual rule handle from updated descriptor.
5609 * Pointer to reply header.
5610 * @param[in, out] arg
5611 * Context pointer for this callback.
5614 * A positive, nonzero value on success (required by libmnl
5615 * to continue messages processing).
5618 flow_tcf_collect_apply_cb(const struct nlmsghdr
*nlh
, void *arg
)
5620 struct nlmsghdr
*nlhrq
= arg
;
5621 struct tcmsg
*tcmrq
= mnl_nlmsg_get_payload(nlhrq
);
5622 struct tcmsg
*tcm
= mnl_nlmsg_get_payload(nlh
);
5625 if (nlh
->nlmsg_type
!= RTM_NEWTFILTER
||
5626 nlh
->nlmsg_seq
!= nlhrq
->nlmsg_seq
)
5628 mnl_attr_for_each(na
, nlh
, sizeof(*tcm
)) {
5629 switch (mnl_attr_get_type(na
)) {
5631 if (strcmp(mnl_attr_get_payload(na
), "flower")) {
5632 /* Not flower filter, drop entire message. */
5635 tcmrq
->tcm_handle
= tcm
->tcm_handle
;
5642 * Apply flow to E-Switch by sending Netlink message.
5645 * Pointer to Ethernet device.
5646 * @param[in, out] flow
5647 * Pointer to the sub flow.
5649 * Pointer to the error structure.
5652 * 0 on success, a negative errno value otherwise and rte_errno is set.
5655 flow_tcf_apply(struct rte_eth_dev
*dev
, struct rte_flow
*flow
,
5656 struct rte_flow_error
*error
)
5658 struct mlx5_priv
*priv
= dev
->data
->dev_private
;
5659 struct mlx5_flow_tcf_context
*ctx
= priv
->tcf_context
;
5660 struct mlx5_flow
*dev_flow
;
5661 struct nlmsghdr
*nlh
;
5667 dev_flow
= LIST_FIRST(&flow
->dev_flows
);
5668 /* E-Switch flow can't be expanded. */
5669 assert(!LIST_NEXT(dev_flow
, next
));
5670 if (dev_flow
->tcf
.applied
)
5672 nlh
= dev_flow
->tcf
.nlh
;
5673 nlh
->nlmsg_type
= RTM_NEWTFILTER
;
5674 nlh
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
|
5675 NLM_F_EXCL
| NLM_F_ECHO
;
5676 tcm
= mnl_nlmsg_get_payload(nlh
);
5677 /* Allow kernel to assign handle on its own. */
5678 tcm
->tcm_handle
= 0;
5679 if (dev_flow
->tcf
.tunnel
) {
5681 * Replace the interface index, target for
5682 * encapsulation, source for decapsulation.
5684 assert(!dev_flow
->tcf
.tunnel
->vtep
);
5685 assert(dev_flow
->tcf
.tunnel
->ifindex_ptr
);
5686 /* Acquire actual VTEP device when rule is being applied. */
5687 dev_flow
->tcf
.tunnel
->vtep
=
5688 flow_tcf_vtep_acquire(ctx
,
5689 dev_flow
->tcf
.tunnel
->ifindex_org
,
5691 if (!dev_flow
->tcf
.tunnel
->vtep
)
5693 DRV_LOG(INFO
, "Replace ifindex: %d->%d",
5694 dev_flow
->tcf
.tunnel
->vtep
->ifindex
,
5695 dev_flow
->tcf
.tunnel
->ifindex_org
);
5696 *dev_flow
->tcf
.tunnel
->ifindex_ptr
=
5697 dev_flow
->tcf
.tunnel
->vtep
->ifindex
;
5698 if (dev_flow
->tcf
.tunnel
->vtep
->waitreg
) {
5699 /* Clear wait flag for VXLAN port registration. */
5700 dev_flow
->tcf
.tunnel
->vtep
->waitreg
= 0;
5701 twait
= rte_get_timer_hz();
5702 assert(twait
> MS_PER_S
);
5703 twait
= twait
* MLX5_VXLAN_WAIT_PORT_REG_MS
;
5704 twait
= twait
/ MS_PER_S
;
5705 start
= rte_get_timer_cycles();
5709 * Kernel creates the VXLAN devices and registers UDP ports to
5710 * be hardware offloaded within the NIC kernel drivers. The
5711 * registration process is being performed into context of
5712 * working kernel thread and the race conditions might happen.
5713 * The VXLAN device is created and success is returned to
5714 * calling application, but the UDP port registration process
5715 * is not completed yet. The next applied rule may be rejected
5716 * by the driver with ENOSUP code. We are going to wait a bit,
5717 * allowing registration process to be completed. The waiting
5718 * is performed once after device been created.
5721 struct timespec onems
;
5723 ret
= flow_tcf_nl_ack(ctx
, nlh
,
5724 flow_tcf_collect_apply_cb
, nlh
);
5725 if (!ret
|| ret
!= -ENOTSUP
|| !twait
)
5727 /* Wait one millisecond and try again till timeout. */
5729 onems
.tv_nsec
= NS_PER_S
/ MS_PER_S
;
5730 nanosleep(&onems
, 0);
5731 if ((rte_get_timer_cycles() - start
) > twait
) {
5732 /* Timeout elapsed, try once more and exit. */
5737 if (!tcm
->tcm_handle
) {
5738 flow_tcf_remove(dev
, flow
);
5739 return rte_flow_error_set
5741 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
5742 "netlink: rule zero handle returned");
5744 dev_flow
->tcf
.applied
= 1;
5745 if (*dev_flow
->tcf
.ptc_flags
& TCA_CLS_FLAGS_SKIP_SW
)
5748 * Rule was applied without skip_sw flag set.
5749 * We should check whether the rule was acctually
5750 * accepted by hardware (have look at in_hw flag).
5752 if (flow_tcf_check_inhw(ctx
, dev_flow
)) {
5753 flow_tcf_remove(dev
, flow
);
5754 return rte_flow_error_set
5756 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
5757 "netlink: rule has no in_hw flag set");
5761 if (dev_flow
->tcf
.tunnel
) {
5762 /* Rollback the VTEP configuration if rule apply failed. */
5763 assert(dev_flow
->tcf
.tunnel
->vtep
);
5764 flow_tcf_vtep_release(ctx
, dev_flow
->tcf
.tunnel
->vtep
,
5766 dev_flow
->tcf
.tunnel
->vtep
= NULL
;
5768 return rte_flow_error_set(error
, rte_errno
,
5769 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
5770 "netlink: failed to create TC flow rule");
5774 * Remove flow from E-Switch and release resources of the device flow.
5777 * Pointer to Ethernet device.
5778 * @param[in, out] flow
5779 * Pointer to the sub flow.
5782 flow_tcf_destroy(struct rte_eth_dev
*dev
, struct rte_flow
*flow
)
5784 struct mlx5_flow
*dev_flow
;
5788 flow_tcf_remove(dev
, flow
);
5789 if (flow
->counter
) {
5790 if (--flow
->counter
->ref_cnt
== 0) {
5791 rte_free(flow
->counter
);
5792 flow
->counter
= NULL
;
5795 dev_flow
= LIST_FIRST(&flow
->dev_flows
);
5798 /* E-Switch flow can't be expanded. */
5799 assert(!LIST_NEXT(dev_flow
, next
));
5800 LIST_REMOVE(dev_flow
, next
);
5805 * Helper routine for figuring the space size required for a parse buffer.
5808 * array of values to use.
5810 * Current location in array.
5812 * Value to compare with.
5815 * The maximum between the given value and the array value on index.
5818 flow_tcf_arr_val_max(uint16_t array
[], int idx
, uint16_t value
)
5820 return idx
< 0 ? (value
) : RTE_MAX((array
)[idx
], value
);
5824 * Parse rtnetlink message attributes filling the attribute table with the info
5828 * Attribute table to be filled.
5830 * Maxinum entry in the attribute table.
5832 * The attributes section in the message to be parsed.
5834 * The length of the attributes section in the message.
5837 flow_tcf_nl_parse_rtattr(struct rtattr
*tb
[], int max
,
5838 struct rtattr
*rta
, int len
)
5840 unsigned short type
;
5841 memset(tb
, 0, sizeof(struct rtattr
*) * (max
+ 1));
5842 while (RTA_OK(rta
, len
)) {
5843 type
= rta
->rta_type
;
5844 if (type
<= max
&& !tb
[type
])
5846 rta
= RTA_NEXT(rta
, len
);
5851 * Extract flow counters from flower action.
5854 * flower action stats properties in the Netlink message received.
5856 * The backward sequence of rta_types, as written in the attribute table,
5857 * we need to traverse in order to get to the requested object.
5859 * Current location in rta_type table.
5861 * data holding the count statistics of the rte_flow retrieved from
5865 * 0 if data was found and retrieved, -1 otherwise.
5868 flow_tcf_nl_action_stats_parse_and_get(struct rtattr
*rta
,
5869 uint16_t rta_type
[], int idx
,
5870 struct gnet_stats_basic
*data
)
5872 int tca_stats_max
= flow_tcf_arr_val_max(rta_type
, idx
,
5874 struct rtattr
*tbs
[tca_stats_max
+ 1];
5876 if (rta
== NULL
|| idx
< 0)
5878 flow_tcf_nl_parse_rtattr(tbs
, tca_stats_max
,
5879 RTA_DATA(rta
), RTA_PAYLOAD(rta
));
5880 switch (rta_type
[idx
]) {
5881 case TCA_STATS_BASIC
:
5882 if (tbs
[TCA_STATS_BASIC
]) {
5883 memcpy(data
, RTA_DATA(tbs
[TCA_STATS_BASIC
]),
5884 RTE_MIN(RTA_PAYLOAD(tbs
[TCA_STATS_BASIC
]),
5896 * Parse flower single action retrieving the requested action attribute,
5900 * flower action properties in the Netlink message received.
5902 * The backward sequence of rta_types, as written in the attribute table,
5903 * we need to traverse in order to get to the requested object.
5905 * Current location in rta_type table.
5907 * Count statistics retrieved from the message query.
5910 * 0 if data was found and retrieved, -1 otherwise.
5913 flow_tcf_nl_parse_one_action_and_get(struct rtattr
*arg
,
5914 uint16_t rta_type
[], int idx
, void *data
)
5916 int tca_act_max
= flow_tcf_arr_val_max(rta_type
, idx
, TCA_ACT_STATS
);
5917 struct rtattr
*tb
[tca_act_max
+ 1];
5919 if (arg
== NULL
|| idx
< 0)
5921 flow_tcf_nl_parse_rtattr(tb
, tca_act_max
,
5922 RTA_DATA(arg
), RTA_PAYLOAD(arg
));
5923 if (tb
[TCA_ACT_KIND
] == NULL
)
5925 switch (rta_type
[idx
]) {
5927 if (tb
[TCA_ACT_STATS
])
5928 return flow_tcf_nl_action_stats_parse_and_get
5931 (struct gnet_stats_basic
*)data
);
5940 * Parse flower action section in the message retrieving the requested
5941 * attribute from the first action that provides it.
5944 * flower section in the Netlink message received.
5946 * The backward sequence of rta_types, as written in the attribute table,
5947 * we need to traverse in order to get to the requested object.
5949 * Current location in rta_type table.
5951 * data retrieved from the message query.
5954 * 0 if data was found and retrieved, -1 otherwise.
5957 flow_tcf_nl_action_parse_and_get(struct rtattr
*arg
,
5958 uint16_t rta_type
[], int idx
, void *data
)
5960 struct rtattr
*tb
[TCA_ACT_MAX_PRIO
+ 1];
5963 if (arg
== NULL
|| idx
< 0)
5965 flow_tcf_nl_parse_rtattr(tb
, TCA_ACT_MAX_PRIO
,
5966 RTA_DATA(arg
), RTA_PAYLOAD(arg
));
5967 switch (rta_type
[idx
]) {
5969 * flow counters are stored in the actions defined by the flow
5970 * and not in the flow itself, therefore we need to traverse the
5971 * flower chain of actions in search for them.
5973 * Note that the index is not decremented here.
5976 for (i
= 0; i
<= TCA_ACT_MAX_PRIO
; i
++) {
5978 !flow_tcf_nl_parse_one_action_and_get(tb
[i
],
5991 * Parse flower classifier options in the message, retrieving the requested
5992 * attribute if found.
5995 * flower section in the Netlink message received.
5997 * The backward sequence of rta_types, as written in the attribute table,
5998 * we need to traverse in order to get to the requested object.
6000 * Current location in rta_type table.
6002 * data retrieved from the message query.
6005 * 0 if data was found and retrieved, -1 otherwise.
6008 flow_tcf_nl_opts_parse_and_get(struct rtattr
*opt
,
6009 uint16_t rta_type
[], int idx
, void *data
)
6011 int tca_flower_max
= flow_tcf_arr_val_max(rta_type
, idx
,
6013 struct rtattr
*tb
[tca_flower_max
+ 1];
6015 if (!opt
|| idx
< 0)
6017 flow_tcf_nl_parse_rtattr(tb
, tca_flower_max
,
6018 RTA_DATA(opt
), RTA_PAYLOAD(opt
));
6019 switch (rta_type
[idx
]) {
6020 case TCA_FLOWER_ACT
:
6021 if (tb
[TCA_FLOWER_ACT
])
6022 return flow_tcf_nl_action_parse_and_get
6023 (tb
[TCA_FLOWER_ACT
],
6024 rta_type
, --idx
, data
);
6033 * Parse Netlink reply on filter query, retrieving the flow counters.
6036 * Message received from Netlink.
6038 * The backward sequence of rta_types, as written in the attribute table,
6039 * we need to traverse in order to get to the requested object.
6041 * Current location in rta_type table.
6043 * data retrieved from the message query.
6046 * 0 if data was found and retrieved, -1 otherwise.
6049 flow_tcf_nl_filter_parse_and_get(struct nlmsghdr
*cnlh
,
6050 uint16_t rta_type
[], int idx
, void *data
)
6052 struct nlmsghdr
*nlh
= cnlh
;
6053 struct tcmsg
*t
= NLMSG_DATA(nlh
);
6054 int len
= nlh
->nlmsg_len
;
6055 int tca_max
= flow_tcf_arr_val_max(rta_type
, idx
, TCA_OPTIONS
);
6056 struct rtattr
*tb
[tca_max
+ 1];
6060 if (nlh
->nlmsg_type
!= RTM_NEWTFILTER
&&
6061 nlh
->nlmsg_type
!= RTM_GETTFILTER
&&
6062 nlh
->nlmsg_type
!= RTM_DELTFILTER
)
6064 len
-= NLMSG_LENGTH(sizeof(*t
));
6067 flow_tcf_nl_parse_rtattr(tb
, tca_max
, TCA_RTA(t
), len
);
6068 /* Not a TC flower flow - bail out */
6069 if (!tb
[TCA_KIND
] ||
6070 strcmp(RTA_DATA(tb
[TCA_KIND
]), "flower"))
6072 switch (rta_type
[idx
]) {
6074 if (tb
[TCA_OPTIONS
])
6075 return flow_tcf_nl_opts_parse_and_get(tb
[TCA_OPTIONS
],
6086 * A callback to parse Netlink reply on TC flower query.
6089 * Message received from Netlink.
6091 * Pointer to data area to be filled by the parsing routine.
6092 * assumed to be a pointer to struct flow_tcf_stats_basic.
6098 flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr
*nlh
, void *data
)
6101 * The backward sequence of rta_types to pass in order to get
6104 uint16_t rta_type
[] = { TCA_STATS_BASIC
, TCA_ACT_STATS
,
6105 TCA_FLOWER_ACT
, TCA_OPTIONS
};
6106 struct flow_tcf_stats_basic
*sb_data
= data
;
6108 const struct nlmsghdr
*c
;
6109 struct nlmsghdr
*nc
;
6110 } tnlh
= { .c
= nlh
};
6112 if (!flow_tcf_nl_filter_parse_and_get(tnlh
.nc
, rta_type
,
6113 RTE_DIM(rta_type
) - 1,
6114 (void *)&sb_data
->counters
))
6115 sb_data
->valid
= true;
6120 * Query a TC flower rule for its statistics via netlink.
6123 * Pointer to Ethernet device.
6125 * Pointer to the sub flow.
6127 * data retrieved by the query.
6129 * Perform verbose error reporting if not NULL.
6132 * 0 on success, a negative errno value otherwise and rte_errno is set.
6135 flow_tcf_query_count(struct rte_eth_dev
*dev
,
6136 struct rte_flow
*flow
,
6138 struct rte_flow_error
*error
)
6140 struct flow_tcf_stats_basic sb_data
;
6141 struct rte_flow_query_count
*qc
= data
;
6142 struct mlx5_priv
*priv
= dev
->data
->dev_private
;
6143 struct mlx5_flow_tcf_context
*ctx
= priv
->tcf_context
;
6144 struct mnl_socket
*nl
= ctx
->nl
;
6145 struct mlx5_flow
*dev_flow
;
6146 struct nlmsghdr
*nlh
;
6147 uint32_t seq
= priv
->tcf_context
->seq
++;
6151 memset(&sb_data
, 0, sizeof(sb_data
));
6152 dev_flow
= LIST_FIRST(&flow
->dev_flows
);
6153 /* E-Switch flow can't be expanded. */
6154 assert(!LIST_NEXT(dev_flow
, next
));
6155 if (!dev_flow
->flow
->counter
)
6157 nlh
= dev_flow
->tcf
.nlh
;
6158 nlh
->nlmsg_type
= RTM_GETTFILTER
;
6159 nlh
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ECHO
;
6160 nlh
->nlmsg_seq
= seq
;
6161 if (mnl_socket_sendto(nl
, nlh
, nlh
->nlmsg_len
) == -1)
6164 ret
= mnl_socket_recvfrom(nl
, ctx
->buf
, ctx
->buf_size
);
6167 ret
= mnl_cb_run(ctx
->buf
, ret
, seq
,
6168 mnl_socket_get_portid(nl
),
6169 flow_tcf_nl_message_get_stats_basic
,
6172 /* Return the delta from last reset. */
6173 if (sb_data
.valid
) {
6174 /* Return the delta from last reset. */
6177 qc
->hits
= sb_data
.counters
.packets
- flow
->counter
->hits
;
6178 qc
->bytes
= sb_data
.counters
.bytes
- flow
->counter
->bytes
;
6180 flow
->counter
->hits
= sb_data
.counters
.packets
;
6181 flow
->counter
->bytes
= sb_data
.counters
.bytes
;
6185 return rte_flow_error_set(error
, EINVAL
,
6186 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
,
6188 "flow does not have counter");
6190 return rte_flow_error_set
6191 (error
, errno
, RTE_FLOW_ERROR_TYPE_UNSPECIFIED
,
6192 NULL
, "netlink: failed to read flow rule counters");
6194 return rte_flow_error_set
6195 (error
, ENOTSUP
, RTE_FLOW_ERROR_TYPE_UNSPECIFIED
,
6196 NULL
, "counters are not available.");
6202 * @see rte_flow_query()
6206 flow_tcf_query(struct rte_eth_dev
*dev
,
6207 struct rte_flow
*flow
,
6208 const struct rte_flow_action
*actions
,
6210 struct rte_flow_error
*error
)
6214 for (; actions
->type
!= RTE_FLOW_ACTION_TYPE_END
; actions
++) {
6215 switch (actions
->type
) {
6216 case RTE_FLOW_ACTION_TYPE_VOID
:
6218 case RTE_FLOW_ACTION_TYPE_COUNT
:
6219 ret
= flow_tcf_query_count(dev
, flow
, data
, error
);
6222 return rte_flow_error_set(error
, ENOTSUP
,
6223 RTE_FLOW_ERROR_TYPE_ACTION
,
6225 "action not supported");
6231 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops
= {
6232 .validate
= flow_tcf_validate
,
6233 .prepare
= flow_tcf_prepare
,
6234 .translate
= flow_tcf_translate
,
6235 .apply
= flow_tcf_apply
,
6236 .remove
= flow_tcf_remove
,
6237 .destroy
= flow_tcf_destroy
,
6238 .query
= flow_tcf_query
,
6242 * Create and configure a libmnl socket for Netlink flow rules.
6245 * A valid libmnl socket object pointer on success, NULL otherwise and
6248 static struct mnl_socket
*
6249 flow_tcf_mnl_socket_create(void)
6251 struct mnl_socket
*nl
= mnl_socket_open(NETLINK_ROUTE
);
6254 mnl_socket_setsockopt(nl
, NETLINK_CAP_ACK
, &(int){ 1 },
6256 if (!mnl_socket_bind(nl
, 0, MNL_SOCKET_AUTOPID
))
6261 mnl_socket_close(nl
);
6266 * Destroy a libmnl socket.
6269 * Libmnl socket of the @p NETLINK_ROUTE kind.
6272 flow_tcf_mnl_socket_destroy(struct mnl_socket
*nl
)
6275 mnl_socket_close(nl
);
6279 * Initialize ingress qdisc of a given network interface.
6282 * Pointer to tc-flower context to use.
6284 * Index of network interface to initialize.
6286 * Perform verbose error reporting if not NULL.
6289 * 0 on success, a negative errno value otherwise and rte_errno is set.
6292 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context
*ctx
,
6293 unsigned int ifindex
, struct rte_flow_error
*error
)
6295 struct nlmsghdr
*nlh
;
6297 alignas(struct nlmsghdr
)
6298 uint8_t buf
[mnl_nlmsg_size(sizeof(*tcm
)) +
6299 SZ_NLATTR_STRZ_OF("ingress") +
6300 MNL_BUF_EXTRA_SPACE
];
6302 /* Destroy existing ingress qdisc and everything attached to it. */
6303 nlh
= mnl_nlmsg_put_header(buf
);
6304 nlh
->nlmsg_type
= RTM_DELQDISC
;
6305 nlh
->nlmsg_flags
= NLM_F_REQUEST
;
6306 tcm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*tcm
));
6307 tcm
->tcm_family
= AF_UNSPEC
;
6308 tcm
->tcm_ifindex
= ifindex
;
6309 tcm
->tcm_handle
= TC_H_MAKE(TC_H_INGRESS
, 0);
6310 tcm
->tcm_parent
= TC_H_INGRESS
;
6311 assert(sizeof(buf
) >= nlh
->nlmsg_len
);
6312 /* Ignore errors when qdisc is already absent. */
6313 if (flow_tcf_nl_ack(ctx
, nlh
, NULL
, NULL
) &&
6314 rte_errno
!= EINVAL
&& rte_errno
!= ENOENT
)
6315 return rte_flow_error_set(error
, rte_errno
,
6316 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
6317 "netlink: failed to remove ingress"
6319 /* Create fresh ingress qdisc. */
6320 nlh
= mnl_nlmsg_put_header(buf
);
6321 nlh
->nlmsg_type
= RTM_NEWQDISC
;
6322 nlh
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
6323 tcm
= mnl_nlmsg_put_extra_header(nlh
, sizeof(*tcm
));
6324 tcm
->tcm_family
= AF_UNSPEC
;
6325 tcm
->tcm_ifindex
= ifindex
;
6326 tcm
->tcm_handle
= TC_H_MAKE(TC_H_INGRESS
, 0);
6327 tcm
->tcm_parent
= TC_H_INGRESS
;
6328 mnl_attr_put_strz_check(nlh
, sizeof(buf
), TCA_KIND
, "ingress");
6329 assert(sizeof(buf
) >= nlh
->nlmsg_len
);
6330 if (flow_tcf_nl_ack(ctx
, nlh
, NULL
, NULL
))
6331 return rte_flow_error_set(error
, rte_errno
,
6332 RTE_FLOW_ERROR_TYPE_UNSPECIFIED
, NULL
,
6333 "netlink: failed to create ingress"
6339 * Create libmnl context for Netlink flow rules.
6342 * A valid libmnl socket object pointer on success, NULL otherwise and
6345 struct mlx5_flow_tcf_context
*
6346 mlx5_flow_tcf_context_create(void)
6348 struct mlx5_flow_tcf_context
*ctx
= rte_zmalloc(__func__
,
6353 ctx
->nl
= flow_tcf_mnl_socket_create();
6356 ctx
->buf_size
= MNL_SOCKET_BUFFER_SIZE
;
6357 ctx
->buf
= rte_zmalloc(__func__
,
6358 ctx
->buf_size
, sizeof(uint32_t));
6361 ctx
->seq
= random();
6364 mlx5_flow_tcf_context_destroy(ctx
);
6369 * Destroy a libmnl context.
6372 * Libmnl socket of the @p NETLINK_ROUTE kind.
6375 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context
*ctx
)
6379 flow_tcf_mnl_socket_destroy(ctx
->nl
);