2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <net/switchdev.h>
42 #include <net/tc_act/tc_mirred.h>
43 #include <net/tc_act/tc_vlan.h>
44 #include <net/tc_act/tc_tunnel_key.h>
45 #include <net/tc_act/tc_pedit.h>
46 #include <net/vxlan.h>
52 struct mlx5_nic_flow_attr
{
59 MLX5E_TC_FLOW_ESWITCH
= BIT(0),
60 MLX5E_TC_FLOW_NIC
= BIT(1),
63 struct mlx5e_tc_flow
{
64 struct rhash_head node
;
67 struct mlx5_flow_handle
*rule
;
68 struct list_head encap
; /* flows sharing the same encap */
70 struct mlx5_esw_flow_attr esw_attr
[0];
71 struct mlx5_nic_flow_attr nic_attr
[0];
75 struct mlx5e_tc_flow_parse_attr
{
76 struct mlx5_flow_spec spec
;
77 int num_mod_hdr_actions
;
78 void *mod_hdr_actions
;
82 MLX5_HEADER_TYPE_VXLAN
= 0x0,
83 MLX5_HEADER_TYPE_NVGRE
= 0x1,
86 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
87 #define MLX5E_TC_TABLE_NUM_GROUPS 4
89 static struct mlx5_flow_handle
*
90 mlx5e_tc_add_nic_flow(struct mlx5e_priv
*priv
,
91 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
92 struct mlx5e_tc_flow
*flow
)
94 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
95 struct mlx5_core_dev
*dev
= priv
->mdev
;
96 struct mlx5_flow_destination dest
= {};
97 struct mlx5_flow_act flow_act
= {
98 .action
= attr
->action
,
99 .flow_tag
= attr
->flow_tag
,
102 struct mlx5_fc
*counter
= NULL
;
103 struct mlx5_flow_handle
*rule
;
104 bool table_created
= false;
107 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
108 dest
.type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
109 dest
.ft
= priv
->fs
.vlan
.ft
.t
;
110 } else if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
111 counter
= mlx5_fc_create(dev
, true);
113 return ERR_CAST(counter
);
115 dest
.type
= MLX5_FLOW_DESTINATION_TYPE_COUNTER
;
116 dest
.counter
= counter
;
119 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
120 err
= mlx5_modify_header_alloc(dev
, MLX5_FLOW_NAMESPACE_KERNEL
,
121 parse_attr
->num_mod_hdr_actions
,
122 parse_attr
->mod_hdr_actions
,
124 flow_act
.modify_id
= attr
->mod_hdr_id
;
125 kfree(parse_attr
->mod_hdr_actions
);
128 goto err_create_mod_hdr_id
;
132 if (IS_ERR_OR_NULL(priv
->fs
.tc
.t
)) {
134 mlx5_create_auto_grouped_flow_table(priv
->fs
.ns
,
136 MLX5E_TC_TABLE_NUM_ENTRIES
,
137 MLX5E_TC_TABLE_NUM_GROUPS
,
139 if (IS_ERR(priv
->fs
.tc
.t
)) {
140 netdev_err(priv
->netdev
,
141 "Failed to create tc offload table\n");
142 rule
= ERR_CAST(priv
->fs
.tc
.t
);
146 table_created
= true;
149 parse_attr
->spec
.match_criteria_enable
= MLX5_MATCH_OUTER_HEADERS
;
150 rule
= mlx5_add_flow_rules(priv
->fs
.tc
.t
, &parse_attr
->spec
,
151 &flow_act
, &dest
, 1);
160 mlx5_destroy_flow_table(priv
->fs
.tc
.t
);
161 priv
->fs
.tc
.t
= NULL
;
164 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
165 mlx5_modify_header_dealloc(priv
->mdev
,
167 err_create_mod_hdr_id
:
168 mlx5_fc_destroy(dev
, counter
);
173 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv
*priv
,
174 struct mlx5e_tc_flow
*flow
)
176 struct mlx5_fc
*counter
= NULL
;
178 counter
= mlx5_flow_rule_counter(flow
->rule
);
179 mlx5_del_flow_rules(flow
->rule
);
180 mlx5_fc_destroy(priv
->mdev
, counter
);
182 if (!mlx5e_tc_num_filters(priv
) && (priv
->fs
.tc
.t
)) {
183 mlx5_destroy_flow_table(priv
->fs
.tc
.t
);
184 priv
->fs
.tc
.t
= NULL
;
187 if (flow
->nic_attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
188 mlx5_modify_header_dealloc(priv
->mdev
,
189 flow
->nic_attr
->mod_hdr_id
);
192 static void mlx5e_detach_encap(struct mlx5e_priv
*priv
,
193 struct mlx5e_tc_flow
*flow
);
195 static struct mlx5_flow_handle
*
196 mlx5e_tc_add_fdb_flow(struct mlx5e_priv
*priv
,
197 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
198 struct mlx5e_tc_flow
*flow
)
200 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
201 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
202 struct mlx5_flow_handle
*rule
;
205 err
= mlx5_eswitch_add_vlan_action(esw
, attr
);
211 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
212 err
= mlx5_modify_header_alloc(priv
->mdev
, MLX5_FLOW_NAMESPACE_FDB
,
213 parse_attr
->num_mod_hdr_actions
,
214 parse_attr
->mod_hdr_actions
,
216 kfree(parse_attr
->mod_hdr_actions
);
223 rule
= mlx5_eswitch_add_offloaded_rule(esw
, &parse_attr
->spec
, attr
);
230 if (flow
->esw_attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
231 mlx5_modify_header_dealloc(priv
->mdev
,
234 mlx5_eswitch_del_vlan_action(esw
, attr
);
236 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_ENCAP
)
237 mlx5e_detach_encap(priv
, flow
);
241 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv
*priv
,
242 struct mlx5e_tc_flow
*flow
)
244 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
245 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
247 mlx5_eswitch_del_offloaded_rule(esw
, flow
->rule
, flow
->esw_attr
);
249 mlx5_eswitch_del_vlan_action(esw
, flow
->esw_attr
);
251 if (flow
->esw_attr
->action
& MLX5_FLOW_CONTEXT_ACTION_ENCAP
)
252 mlx5e_detach_encap(priv
, flow
);
254 if (flow
->esw_attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
255 mlx5_modify_header_dealloc(priv
->mdev
,
259 static void mlx5e_detach_encap(struct mlx5e_priv
*priv
,
260 struct mlx5e_tc_flow
*flow
)
262 struct list_head
*next
= flow
->encap
.next
;
264 list_del(&flow
->encap
);
265 if (list_empty(next
)) {
266 struct mlx5_encap_entry
*e
;
268 e
= list_entry(next
, struct mlx5_encap_entry
, flows
);
270 mlx5_encap_dealloc(priv
->mdev
, e
->encap_id
);
273 hlist_del_rcu(&e
->encap_hlist
);
278 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
279 struct mlx5e_tc_flow
*flow
)
281 if (flow
->flags
& MLX5E_TC_FLOW_ESWITCH
)
282 mlx5e_tc_del_fdb_flow(priv
, flow
);
284 mlx5e_tc_del_nic_flow(priv
, flow
);
287 static void parse_vxlan_attr(struct mlx5_flow_spec
*spec
,
288 struct tc_cls_flower_offload
*f
)
290 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
292 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
294 void *misc_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
296 void *misc_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
299 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
);
300 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
, IPPROTO_UDP
);
302 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_KEYID
)) {
303 struct flow_dissector_key_keyid
*key
=
304 skb_flow_dissector_target(f
->dissector
,
305 FLOW_DISSECTOR_KEY_ENC_KEYID
,
307 struct flow_dissector_key_keyid
*mask
=
308 skb_flow_dissector_target(f
->dissector
,
309 FLOW_DISSECTOR_KEY_ENC_KEYID
,
311 MLX5_SET(fte_match_set_misc
, misc_c
, vxlan_vni
,
312 be32_to_cpu(mask
->keyid
));
313 MLX5_SET(fte_match_set_misc
, misc_v
, vxlan_vni
,
314 be32_to_cpu(key
->keyid
));
318 static int parse_tunnel_attr(struct mlx5e_priv
*priv
,
319 struct mlx5_flow_spec
*spec
,
320 struct tc_cls_flower_offload
*f
)
322 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
324 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
327 struct flow_dissector_key_control
*enc_control
=
328 skb_flow_dissector_target(f
->dissector
,
329 FLOW_DISSECTOR_KEY_ENC_CONTROL
,
332 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_PORTS
)) {
333 struct flow_dissector_key_ports
*key
=
334 skb_flow_dissector_target(f
->dissector
,
335 FLOW_DISSECTOR_KEY_ENC_PORTS
,
337 struct flow_dissector_key_ports
*mask
=
338 skb_flow_dissector_target(f
->dissector
,
339 FLOW_DISSECTOR_KEY_ENC_PORTS
,
341 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
342 struct net_device
*up_dev
= mlx5_eswitch_get_uplink_netdev(esw
);
343 struct mlx5e_priv
*up_priv
= netdev_priv(up_dev
);
345 /* Full udp dst port must be given */
346 if (memchr_inv(&mask
->dst
, 0xff, sizeof(mask
->dst
)))
347 goto vxlan_match_offload_err
;
349 if (mlx5e_vxlan_lookup_port(up_priv
, be16_to_cpu(key
->dst
)) &&
350 MLX5_CAP_ESW(priv
->mdev
, vxlan_encap_decap
))
351 parse_vxlan_attr(spec
, f
);
353 netdev_warn(priv
->netdev
,
354 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key
->dst
));
358 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
359 udp_dport
, ntohs(mask
->dst
));
360 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
361 udp_dport
, ntohs(key
->dst
));
363 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
364 udp_sport
, ntohs(mask
->src
));
365 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
366 udp_sport
, ntohs(key
->src
));
367 } else { /* udp dst port must be given */
368 vxlan_match_offload_err
:
369 netdev_warn(priv
->netdev
,
370 "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
374 if (enc_control
->addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
375 struct flow_dissector_key_ipv4_addrs
*key
=
376 skb_flow_dissector_target(f
->dissector
,
377 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
,
379 struct flow_dissector_key_ipv4_addrs
*mask
=
380 skb_flow_dissector_target(f
->dissector
,
381 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
,
383 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
384 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
386 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
387 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
390 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
391 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
393 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
394 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
397 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ethertype
);
398 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
, ETH_P_IP
);
399 } else if (enc_control
->addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
400 struct flow_dissector_key_ipv6_addrs
*key
=
401 skb_flow_dissector_target(f
->dissector
,
402 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
,
404 struct flow_dissector_key_ipv6_addrs
*mask
=
405 skb_flow_dissector_target(f
->dissector
,
406 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
,
409 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
410 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
411 &mask
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
412 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
413 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
414 &key
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
416 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
417 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
418 &mask
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
419 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
420 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
421 &key
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
423 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ethertype
);
424 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
, ETH_P_IPV6
);
427 /* Enforce DMAC when offloading incoming tunneled flows.
428 * Flow counters require a match on the DMAC.
430 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_47_16
);
431 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_15_0
);
432 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
433 dmac_47_16
), priv
->netdev
->dev_addr
);
435 /* let software handle IP fragments */
436 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
437 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
, 0);
442 static int __parse_cls_flower(struct mlx5e_priv
*priv
,
443 struct mlx5_flow_spec
*spec
,
444 struct tc_cls_flower_offload
*f
,
447 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
449 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
454 *min_inline
= MLX5_INLINE_MODE_L2
;
456 if (f
->dissector
->used_keys
&
457 ~(BIT(FLOW_DISSECTOR_KEY_CONTROL
) |
458 BIT(FLOW_DISSECTOR_KEY_BASIC
) |
459 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS
) |
460 BIT(FLOW_DISSECTOR_KEY_VLAN
) |
461 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS
) |
462 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS
) |
463 BIT(FLOW_DISSECTOR_KEY_PORTS
) |
464 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID
) |
465 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) |
466 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
) |
467 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS
) |
468 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL
))) {
469 netdev_warn(priv
->netdev
, "Unsupported key used: 0x%x\n",
470 f
->dissector
->used_keys
);
474 if ((dissector_uses_key(f
->dissector
,
475 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) ||
476 dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_KEYID
) ||
477 dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_PORTS
)) &&
478 dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_CONTROL
)) {
479 struct flow_dissector_key_control
*key
=
480 skb_flow_dissector_target(f
->dissector
,
481 FLOW_DISSECTOR_KEY_ENC_CONTROL
,
483 switch (key
->addr_type
) {
484 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
485 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
486 if (parse_tunnel_attr(priv
, spec
, f
))
493 /* In decap flow, header pointers should point to the inner
494 * headers, outer header were already set by parse_tunnel_attr
496 headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
498 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
502 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_CONTROL
)) {
503 struct flow_dissector_key_control
*key
=
504 skb_flow_dissector_target(f
->dissector
,
505 FLOW_DISSECTOR_KEY_CONTROL
,
508 struct flow_dissector_key_control
*mask
=
509 skb_flow_dissector_target(f
->dissector
,
510 FLOW_DISSECTOR_KEY_CONTROL
,
512 addr_type
= key
->addr_type
;
514 if (mask
->flags
& FLOW_DIS_IS_FRAGMENT
) {
515 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
516 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
,
517 key
->flags
& FLOW_DIS_IS_FRAGMENT
);
519 /* the HW doesn't need L3 inline to match on frag=no */
520 if (key
->flags
& FLOW_DIS_IS_FRAGMENT
)
521 *min_inline
= MLX5_INLINE_MODE_IP
;
525 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_BASIC
)) {
526 struct flow_dissector_key_basic
*key
=
527 skb_flow_dissector_target(f
->dissector
,
528 FLOW_DISSECTOR_KEY_BASIC
,
530 struct flow_dissector_key_basic
*mask
=
531 skb_flow_dissector_target(f
->dissector
,
532 FLOW_DISSECTOR_KEY_BASIC
,
534 ip_proto
= key
->ip_proto
;
536 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ethertype
,
537 ntohs(mask
->n_proto
));
538 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
539 ntohs(key
->n_proto
));
541 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
,
543 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
,
547 *min_inline
= MLX5_INLINE_MODE_IP
;
550 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ETH_ADDRS
)) {
551 struct flow_dissector_key_eth_addrs
*key
=
552 skb_flow_dissector_target(f
->dissector
,
553 FLOW_DISSECTOR_KEY_ETH_ADDRS
,
555 struct flow_dissector_key_eth_addrs
*mask
=
556 skb_flow_dissector_target(f
->dissector
,
557 FLOW_DISSECTOR_KEY_ETH_ADDRS
,
560 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
563 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
567 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
570 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
575 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_VLAN
)) {
576 struct flow_dissector_key_vlan
*key
=
577 skb_flow_dissector_target(f
->dissector
,
578 FLOW_DISSECTOR_KEY_VLAN
,
580 struct flow_dissector_key_vlan
*mask
=
581 skb_flow_dissector_target(f
->dissector
,
582 FLOW_DISSECTOR_KEY_VLAN
,
584 if (mask
->vlan_id
|| mask
->vlan_priority
) {
585 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
, 1);
586 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
, 1);
588 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_vid
, mask
->vlan_id
);
589 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_vid
, key
->vlan_id
);
591 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_prio
, mask
->vlan_priority
);
592 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_prio
, key
->vlan_priority
);
596 if (addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
597 struct flow_dissector_key_ipv4_addrs
*key
=
598 skb_flow_dissector_target(f
->dissector
,
599 FLOW_DISSECTOR_KEY_IPV4_ADDRS
,
601 struct flow_dissector_key_ipv4_addrs
*mask
=
602 skb_flow_dissector_target(f
->dissector
,
603 FLOW_DISSECTOR_KEY_IPV4_ADDRS
,
606 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
607 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
608 &mask
->src
, sizeof(mask
->src
));
609 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
610 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
611 &key
->src
, sizeof(key
->src
));
612 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
613 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
614 &mask
->dst
, sizeof(mask
->dst
));
615 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
616 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
617 &key
->dst
, sizeof(key
->dst
));
619 if (mask
->src
|| mask
->dst
)
620 *min_inline
= MLX5_INLINE_MODE_IP
;
623 if (addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
624 struct flow_dissector_key_ipv6_addrs
*key
=
625 skb_flow_dissector_target(f
->dissector
,
626 FLOW_DISSECTOR_KEY_IPV6_ADDRS
,
628 struct flow_dissector_key_ipv6_addrs
*mask
=
629 skb_flow_dissector_target(f
->dissector
,
630 FLOW_DISSECTOR_KEY_IPV6_ADDRS
,
633 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
634 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
635 &mask
->src
, sizeof(mask
->src
));
636 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
637 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
638 &key
->src
, sizeof(key
->src
));
640 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
641 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
642 &mask
->dst
, sizeof(mask
->dst
));
643 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
644 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
645 &key
->dst
, sizeof(key
->dst
));
647 if (ipv6_addr_type(&mask
->src
) != IPV6_ADDR_ANY
||
648 ipv6_addr_type(&mask
->dst
) != IPV6_ADDR_ANY
)
649 *min_inline
= MLX5_INLINE_MODE_IP
;
652 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_PORTS
)) {
653 struct flow_dissector_key_ports
*key
=
654 skb_flow_dissector_target(f
->dissector
,
655 FLOW_DISSECTOR_KEY_PORTS
,
657 struct flow_dissector_key_ports
*mask
=
658 skb_flow_dissector_target(f
->dissector
,
659 FLOW_DISSECTOR_KEY_PORTS
,
663 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
664 tcp_sport
, ntohs(mask
->src
));
665 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
666 tcp_sport
, ntohs(key
->src
));
668 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
669 tcp_dport
, ntohs(mask
->dst
));
670 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
671 tcp_dport
, ntohs(key
->dst
));
675 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
676 udp_sport
, ntohs(mask
->src
));
677 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
678 udp_sport
, ntohs(key
->src
));
680 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
681 udp_dport
, ntohs(mask
->dst
));
682 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
683 udp_dport
, ntohs(key
->dst
));
686 netdev_err(priv
->netdev
,
687 "Only UDP and TCP transport are supported\n");
691 if (mask
->src
|| mask
->dst
)
692 *min_inline
= MLX5_INLINE_MODE_TCP_UDP
;
698 static int parse_cls_flower(struct mlx5e_priv
*priv
,
699 struct mlx5e_tc_flow
*flow
,
700 struct mlx5_flow_spec
*spec
,
701 struct tc_cls_flower_offload
*f
)
703 struct mlx5_core_dev
*dev
= priv
->mdev
;
704 struct mlx5_eswitch
*esw
= dev
->priv
.eswitch
;
705 struct mlx5_eswitch_rep
*rep
= priv
->ppriv
;
709 err
= __parse_cls_flower(priv
, spec
, f
, &min_inline
);
711 if (!err
&& (flow
->flags
& MLX5E_TC_FLOW_ESWITCH
) &&
712 rep
->vport
!= FDB_UPLINK_VPORT
) {
713 if (esw
->offloads
.inline_mode
!= MLX5_INLINE_MODE_NONE
&&
714 esw
->offloads
.inline_mode
< min_inline
) {
715 netdev_warn(priv
->netdev
,
716 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
717 min_inline
, esw
->offloads
.inline_mode
);
725 struct pedit_headers
{
733 static int pedit_header_offsets
[] = {
734 [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH
] = offsetof(struct pedit_headers
, eth
),
735 [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4
] = offsetof(struct pedit_headers
, ip4
),
736 [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
] = offsetof(struct pedit_headers
, ip6
),
737 [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP
] = offsetof(struct pedit_headers
, tcp
),
738 [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP
] = offsetof(struct pedit_headers
, udp
),
741 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
743 static int set_pedit_val(u8 hdr_type
, u32 mask
, u32 val
, u32 offset
,
744 struct pedit_headers
*masks
,
745 struct pedit_headers
*vals
)
747 u32
*curr_pmask
, *curr_pval
;
749 if (hdr_type
>= __PEDIT_HDR_TYPE_MAX
)
752 curr_pmask
= (u32
*)(pedit_header(masks
, hdr_type
) + offset
);
753 curr_pval
= (u32
*)(pedit_header(vals
, hdr_type
) + offset
);
755 if (*curr_pmask
& mask
) /* disallow acting twice on the same location */
759 *curr_pval
|= (val
& mask
);
773 static struct mlx5_fields fields
[] = {
774 {MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16
, 4, offsetof(struct pedit_headers
, eth
.h_dest
[0])},
775 {MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0
, 2, offsetof(struct pedit_headers
, eth
.h_dest
[4])},
776 {MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16
, 4, offsetof(struct pedit_headers
, eth
.h_source
[0])},
777 {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0
, 2, offsetof(struct pedit_headers
, eth
.h_source
[4])},
778 {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE
, 2, offsetof(struct pedit_headers
, eth
.h_proto
)},
780 {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP
, 1, offsetof(struct pedit_headers
, ip4
.tos
)},
781 {MLX5_ACTION_IN_FIELD_OUT_IP_TTL
, 1, offsetof(struct pedit_headers
, ip4
.ttl
)},
782 {MLX5_ACTION_IN_FIELD_OUT_SIPV4
, 4, offsetof(struct pedit_headers
, ip4
.saddr
)},
783 {MLX5_ACTION_IN_FIELD_OUT_DIPV4
, 4, offsetof(struct pedit_headers
, ip4
.daddr
)},
785 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96
, 4, offsetof(struct pedit_headers
, ip6
.saddr
.s6_addr32
[0])},
786 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64
, 4, offsetof(struct pedit_headers
, ip6
.saddr
.s6_addr32
[1])},
787 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32
, 4, offsetof(struct pedit_headers
, ip6
.saddr
.s6_addr32
[2])},
788 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0
, 4, offsetof(struct pedit_headers
, ip6
.saddr
.s6_addr32
[3])},
789 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96
, 4, offsetof(struct pedit_headers
, ip6
.daddr
.s6_addr32
[0])},
790 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64
, 4, offsetof(struct pedit_headers
, ip6
.daddr
.s6_addr32
[1])},
791 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32
, 4, offsetof(struct pedit_headers
, ip6
.daddr
.s6_addr32
[2])},
792 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0
, 4, offsetof(struct pedit_headers
, ip6
.daddr
.s6_addr32
[3])},
794 {MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT
, 2, offsetof(struct pedit_headers
, tcp
.source
)},
795 {MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT
, 2, offsetof(struct pedit_headers
, tcp
.dest
)},
796 {MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS
, 1, offsetof(struct pedit_headers
, tcp
.ack_seq
) + 5},
798 {MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT
, 2, offsetof(struct pedit_headers
, udp
.source
)},
799 {MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT
, 2, offsetof(struct pedit_headers
, udp
.dest
)},
802 /* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at
803 * max from the SW pedit action. On success, it says how many HW actions were
806 static int offload_pedit_fields(struct pedit_headers
*masks
,
807 struct pedit_headers
*vals
,
808 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
810 struct pedit_headers
*set_masks
, *add_masks
, *set_vals
, *add_vals
;
811 int i
, action_size
, nactions
, max_actions
, first
, last
;
812 void *s_masks_p
, *a_masks_p
, *vals_p
;
813 u32 s_mask
, a_mask
, val
;
814 struct mlx5_fields
*f
;
819 set_masks
= &masks
[TCA_PEDIT_KEY_EX_CMD_SET
];
820 add_masks
= &masks
[TCA_PEDIT_KEY_EX_CMD_ADD
];
821 set_vals
= &vals
[TCA_PEDIT_KEY_EX_CMD_SET
];
822 add_vals
= &vals
[TCA_PEDIT_KEY_EX_CMD_ADD
];
824 action_size
= MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto
);
825 action
= parse_attr
->mod_hdr_actions
;
826 max_actions
= parse_attr
->num_mod_hdr_actions
;
829 for (i
= 0; i
< ARRAY_SIZE(fields
); i
++) {
831 /* avoid seeing bits set from previous iterations */
832 s_mask
= a_mask
= mask
= val
= 0;
834 s_masks_p
= (void *)set_masks
+ f
->offset
;
835 a_masks_p
= (void *)add_masks
+ f
->offset
;
837 memcpy(&s_mask
, s_masks_p
, f
->size
);
838 memcpy(&a_mask
, a_masks_p
, f
->size
);
840 if (!s_mask
&& !a_mask
) /* nothing to offload here */
843 if (s_mask
&& a_mask
) {
844 printk(KERN_WARNING
"mlx5: can't set and add to the same HW field (%x)\n", f
->field
);
848 if (nactions
== max_actions
) {
849 printk(KERN_WARNING
"mlx5: parsed %d pedit actions, can't do more\n", nactions
);
854 cmd
= MLX5_ACTION_TYPE_SET
;
856 vals_p
= (void *)set_vals
+ f
->offset
;
857 /* clear to denote we consumed this field */
858 memset(s_masks_p
, 0, f
->size
);
860 cmd
= MLX5_ACTION_TYPE_ADD
;
862 vals_p
= (void *)add_vals
+ f
->offset
;
863 /* clear to denote we consumed this field */
864 memset(a_masks_p
, 0, f
->size
);
867 memcpy(&val
, vals_p
, f
->size
);
869 field_bsize
= f
->size
* BITS_PER_BYTE
;
870 first
= find_first_bit(&mask
, field_bsize
);
871 last
= find_last_bit(&mask
, field_bsize
);
872 if (first
> 0 || last
!= (field_bsize
- 1)) {
873 printk(KERN_WARNING
"mlx5: partial rewrite (mask %lx) is currently not offloaded\n",
878 MLX5_SET(set_action_in
, action
, action_type
, cmd
);
879 MLX5_SET(set_action_in
, action
, field
, f
->field
);
881 if (cmd
== MLX5_ACTION_TYPE_SET
) {
882 MLX5_SET(set_action_in
, action
, offset
, 0);
883 /* length is num of bits to be written, zero means length of 32 */
884 MLX5_SET(set_action_in
, action
, length
, field_bsize
);
887 if (field_bsize
== 32)
888 MLX5_SET(set_action_in
, action
, data
, ntohl(val
));
889 else if (field_bsize
== 16)
890 MLX5_SET(set_action_in
, action
, data
, ntohs(val
));
891 else if (field_bsize
== 8)
892 MLX5_SET(set_action_in
, action
, data
, val
);
894 action
+= action_size
;
898 parse_attr
->num_mod_hdr_actions
= nactions
;
902 static int alloc_mod_hdr_actions(struct mlx5e_priv
*priv
,
903 const struct tc_action
*a
, int namespace,
904 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
906 int nkeys
, action_size
, max_actions
;
908 nkeys
= tcf_pedit_nkeys(a
);
909 action_size
= MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto
);
911 if (namespace == MLX5_FLOW_NAMESPACE_FDB
) /* FDB offloading */
912 max_actions
= MLX5_CAP_ESW_FLOWTABLE_FDB(priv
->mdev
, max_modify_header_actions
);
913 else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
914 max_actions
= MLX5_CAP_FLOWTABLE_NIC_RX(priv
->mdev
, max_modify_header_actions
);
916 /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
917 max_actions
= min(max_actions
, nkeys
* 16);
919 parse_attr
->mod_hdr_actions
= kcalloc(max_actions
, action_size
, GFP_KERNEL
);
920 if (!parse_attr
->mod_hdr_actions
)
923 parse_attr
->num_mod_hdr_actions
= max_actions
;
927 static const struct pedit_headers zero_masks
= {};
929 static int parse_tc_pedit_action(struct mlx5e_priv
*priv
,
930 const struct tc_action
*a
, int namespace,
931 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
933 struct pedit_headers masks
[__PEDIT_CMD_MAX
], vals
[__PEDIT_CMD_MAX
], *cmd_masks
;
934 int nkeys
, i
, err
= -EOPNOTSUPP
;
935 u32 mask
, val
, offset
;
938 nkeys
= tcf_pedit_nkeys(a
);
940 memset(masks
, 0, sizeof(struct pedit_headers
) * __PEDIT_CMD_MAX
);
941 memset(vals
, 0, sizeof(struct pedit_headers
) * __PEDIT_CMD_MAX
);
943 for (i
= 0; i
< nkeys
; i
++) {
944 htype
= tcf_pedit_htype(a
, i
);
945 cmd
= tcf_pedit_cmd(a
, i
);
946 err
= -EOPNOTSUPP
; /* can't be all optimistic */
948 if (htype
== TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK
) {
949 printk(KERN_WARNING
"mlx5: legacy pedit isn't offloaded\n");
953 if (cmd
!= TCA_PEDIT_KEY_EX_CMD_SET
&& cmd
!= TCA_PEDIT_KEY_EX_CMD_ADD
) {
954 printk(KERN_WARNING
"mlx5: pedit cmd %d isn't offloaded\n", cmd
);
958 mask
= tcf_pedit_mask(a
, i
);
959 val
= tcf_pedit_val(a
, i
);
960 offset
= tcf_pedit_offset(a
, i
);
962 err
= set_pedit_val(htype
, ~mask
, val
, offset
, &masks
[cmd
], &vals
[cmd
]);
967 err
= alloc_mod_hdr_actions(priv
, a
, namespace, parse_attr
);
971 err
= offload_pedit_fields(masks
, vals
, parse_attr
);
973 goto out_dealloc_parsed_actions
;
975 for (cmd
= 0; cmd
< __PEDIT_CMD_MAX
; cmd
++) {
976 cmd_masks
= &masks
[cmd
];
977 if (memcmp(cmd_masks
, &zero_masks
, sizeof(zero_masks
))) {
978 printk(KERN_WARNING
"mlx5: attempt to offload an unsupported field (cmd %d)\n",
980 print_hex_dump(KERN_WARNING
, "mask: ", DUMP_PREFIX_ADDRESS
,
981 16, 1, cmd_masks
, sizeof(zero_masks
), true);
983 goto out_dealloc_parsed_actions
;
989 out_dealloc_parsed_actions
:
990 kfree(parse_attr
->mod_hdr_actions
);
995 static int parse_tc_nic_actions(struct mlx5e_priv
*priv
, struct tcf_exts
*exts
,
996 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
997 struct mlx5e_tc_flow
*flow
)
999 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
1000 const struct tc_action
*a
;
1004 if (tc_no_actions(exts
))
1007 attr
->flow_tag
= MLX5_FS_DEFAULT_FLOW_TAG
;
1010 tcf_exts_to_list(exts
, &actions
);
1011 list_for_each_entry(a
, &actions
, list
) {
1012 /* Only support a single action per rule */
1016 if (is_tcf_gact_shot(a
)) {
1017 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
;
1018 if (MLX5_CAP_FLOWTABLE(priv
->mdev
,
1019 flow_table_properties_nic_receive
.flow_counter
))
1020 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1024 if (is_tcf_pedit(a
)) {
1025 err
= parse_tc_pedit_action(priv
, a
, MLX5_FLOW_NAMESPACE_KERNEL
,
1030 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
|
1031 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1035 if (is_tcf_skbedit_mark(a
)) {
1036 u32 mark
= tcf_skbedit_mark(a
);
1038 if (mark
& ~MLX5E_TC_FLOW_ID_MASK
) {
1039 netdev_warn(priv
->netdev
, "Bad flow mark - only 16 bit is supported: 0x%x\n",
1044 attr
->flow_tag
= mark
;
1045 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1055 static inline int cmp_encap_info(struct ip_tunnel_key
*a
,
1056 struct ip_tunnel_key
*b
)
1058 return memcmp(a
, b
, sizeof(*a
));
1061 static inline int hash_encap_info(struct ip_tunnel_key
*key
)
1063 return jhash(key
, sizeof(*key
), 0);
1066 static int mlx5e_route_lookup_ipv4(struct mlx5e_priv
*priv
,
1067 struct net_device
*mirred_dev
,
1068 struct net_device
**out_dev
,
1070 struct neighbour
**out_n
,
1073 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1075 struct neighbour
*n
= NULL
;
1077 #if IS_ENABLED(CONFIG_INET)
1080 rt
= ip_route_output_key(dev_net(mirred_dev
), fl4
);
1081 ret
= PTR_ERR_OR_ZERO(rt
);
1087 /* if the egress device isn't on the same HW e-switch, we use the uplink */
1088 if (!switchdev_port_same_parent_id(priv
->netdev
, rt
->dst
.dev
))
1089 *out_dev
= mlx5_eswitch_get_uplink_netdev(esw
);
1091 *out_dev
= rt
->dst
.dev
;
1093 *out_ttl
= ip4_dst_hoplimit(&rt
->dst
);
1094 n
= dst_neigh_lookup(&rt
->dst
, &fl4
->daddr
);
1103 static int mlx5e_route_lookup_ipv6(struct mlx5e_priv
*priv
,
1104 struct net_device
*mirred_dev
,
1105 struct net_device
**out_dev
,
1107 struct neighbour
**out_n
,
1110 struct neighbour
*n
= NULL
;
1111 struct dst_entry
*dst
;
1113 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
1114 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1117 dst
= ip6_route_output(dev_net(mirred_dev
), NULL
, fl6
);
1124 *out_ttl
= ip6_dst_hoplimit(dst
);
1126 /* if the egress device isn't on the same HW e-switch, we use the uplink */
1127 if (!switchdev_port_same_parent_id(priv
->netdev
, dst
->dev
))
1128 *out_dev
= mlx5_eswitch_get_uplink_netdev(esw
);
1130 *out_dev
= dst
->dev
;
1135 n
= dst_neigh_lookup(dst
, &fl6
->daddr
);
1144 static void gen_vxlan_header_ipv4(struct net_device
*out_dev
,
1145 char buf
[], int encap_size
,
1146 unsigned char h_dest
[ETH_ALEN
],
1150 __be16 udp_dst_port
,
1153 struct ethhdr
*eth
= (struct ethhdr
*)buf
;
1154 struct iphdr
*ip
= (struct iphdr
*)((char *)eth
+ sizeof(struct ethhdr
));
1155 struct udphdr
*udp
= (struct udphdr
*)((char *)ip
+ sizeof(struct iphdr
));
1156 struct vxlanhdr
*vxh
= (struct vxlanhdr
*)((char *)udp
+ sizeof(struct udphdr
));
1158 memset(buf
, 0, encap_size
);
1160 ether_addr_copy(eth
->h_dest
, h_dest
);
1161 ether_addr_copy(eth
->h_source
, out_dev
->dev_addr
);
1162 eth
->h_proto
= htons(ETH_P_IP
);
1168 ip
->protocol
= IPPROTO_UDP
;
1172 udp
->dest
= udp_dst_port
;
1173 vxh
->vx_flags
= VXLAN_HF_VNI
;
1174 vxh
->vx_vni
= vxlan_vni_field(vx_vni
);
1177 static void gen_vxlan_header_ipv6(struct net_device
*out_dev
,
1178 char buf
[], int encap_size
,
1179 unsigned char h_dest
[ETH_ALEN
],
1181 struct in6_addr
*daddr
,
1182 struct in6_addr
*saddr
,
1183 __be16 udp_dst_port
,
1186 struct ethhdr
*eth
= (struct ethhdr
*)buf
;
1187 struct ipv6hdr
*ip6h
= (struct ipv6hdr
*)((char *)eth
+ sizeof(struct ethhdr
));
1188 struct udphdr
*udp
= (struct udphdr
*)((char *)ip6h
+ sizeof(struct ipv6hdr
));
1189 struct vxlanhdr
*vxh
= (struct vxlanhdr
*)((char *)udp
+ sizeof(struct udphdr
));
1191 memset(buf
, 0, encap_size
);
1193 ether_addr_copy(eth
->h_dest
, h_dest
);
1194 ether_addr_copy(eth
->h_source
, out_dev
->dev_addr
);
1195 eth
->h_proto
= htons(ETH_P_IPV6
);
1197 ip6_flow_hdr(ip6h
, 0, 0);
1198 /* the HW fills up ipv6 payload len */
1199 ip6h
->nexthdr
= IPPROTO_UDP
;
1200 ip6h
->hop_limit
= ttl
;
1201 ip6h
->daddr
= *daddr
;
1202 ip6h
->saddr
= *saddr
;
1204 udp
->dest
= udp_dst_port
;
1205 vxh
->vx_flags
= VXLAN_HF_VNI
;
1206 vxh
->vx_vni
= vxlan_vni_field(vx_vni
);
1209 static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv
*priv
,
1210 struct net_device
*mirred_dev
,
1211 struct mlx5_encap_entry
*e
,
1212 struct net_device
**out_dev
)
1214 int max_encap_size
= MLX5_CAP_ESW(priv
->mdev
, max_encap_header_size
);
1215 int ipv4_encap_size
= ETH_HLEN
+ sizeof(struct iphdr
) + VXLAN_HLEN
;
1216 struct ip_tunnel_key
*tun_key
= &e
->tun_info
.key
;
1217 struct neighbour
*n
= NULL
;
1218 struct flowi4 fl4
= {};
1222 if (max_encap_size
< ipv4_encap_size
) {
1223 mlx5_core_warn(priv
->mdev
, "encap size %d too big, max supported is %d\n",
1224 ipv4_encap_size
, max_encap_size
);
1228 encap_header
= kzalloc(ipv4_encap_size
, GFP_KERNEL
);
1232 switch (e
->tunnel_type
) {
1233 case MLX5_HEADER_TYPE_VXLAN
:
1234 fl4
.flowi4_proto
= IPPROTO_UDP
;
1235 fl4
.fl4_dport
= tun_key
->tp_dst
;
1241 fl4
.flowi4_tos
= tun_key
->tos
;
1242 fl4
.daddr
= tun_key
->u
.ipv4
.dst
;
1243 fl4
.saddr
= tun_key
->u
.ipv4
.src
;
1245 err
= mlx5e_route_lookup_ipv4(priv
, mirred_dev
, out_dev
,
1250 if (!(n
->nud_state
& NUD_VALID
)) {
1251 pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__
, &fl4
.daddr
);
1257 e
->out_dev
= *out_dev
;
1259 neigh_ha_snapshot(e
->h_dest
, n
, *out_dev
);
1261 switch (e
->tunnel_type
) {
1262 case MLX5_HEADER_TYPE_VXLAN
:
1263 gen_vxlan_header_ipv4(*out_dev
, encap_header
,
1264 ipv4_encap_size
, e
->h_dest
, ttl
,
1266 fl4
.saddr
, tun_key
->tp_dst
,
1267 tunnel_id_to_key32(tun_key
->tun_id
));
1274 err
= mlx5_encap_alloc(priv
->mdev
, e
->tunnel_type
,
1275 ipv4_encap_size
, encap_header
, &e
->encap_id
);
1279 kfree(encap_header
);
1283 static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv
*priv
,
1284 struct net_device
*mirred_dev
,
1285 struct mlx5_encap_entry
*e
,
1286 struct net_device
**out_dev
)
1289 int max_encap_size
= MLX5_CAP_ESW(priv
->mdev
, max_encap_header_size
);
1290 int ipv6_encap_size
= ETH_HLEN
+ sizeof(struct ipv6hdr
) + VXLAN_HLEN
;
1291 struct ip_tunnel_key
*tun_key
= &e
->tun_info
.key
;
1292 struct neighbour
*n
= NULL
;
1293 struct flowi6 fl6
= {};
1297 if (max_encap_size
< ipv6_encap_size
) {
1298 mlx5_core_warn(priv
->mdev
, "encap size %d too big, max supported is %d\n",
1299 ipv6_encap_size
, max_encap_size
);
1303 encap_header
= kzalloc(ipv6_encap_size
, GFP_KERNEL
);
1307 switch (e
->tunnel_type
) {
1308 case MLX5_HEADER_TYPE_VXLAN
:
1309 fl6
.flowi6_proto
= IPPROTO_UDP
;
1310 fl6
.fl6_dport
= tun_key
->tp_dst
;
1317 fl6
.flowlabel
= ip6_make_flowinfo(RT_TOS(tun_key
->tos
), tun_key
->label
);
1318 fl6
.daddr
= tun_key
->u
.ipv6
.dst
;
1319 fl6
.saddr
= tun_key
->u
.ipv6
.src
;
1321 err
= mlx5e_route_lookup_ipv6(priv
, mirred_dev
, out_dev
,
1326 if (!(n
->nud_state
& NUD_VALID
)) {
1327 pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__
, &fl6
.daddr
);
1333 e
->out_dev
= *out_dev
;
1335 neigh_ha_snapshot(e
->h_dest
, n
, *out_dev
);
1337 switch (e
->tunnel_type
) {
1338 case MLX5_HEADER_TYPE_VXLAN
:
1339 gen_vxlan_header_ipv6(*out_dev
, encap_header
,
1340 ipv6_encap_size
, e
->h_dest
, ttl
,
1342 &fl6
.saddr
, tun_key
->tp_dst
,
1343 tunnel_id_to_key32(tun_key
->tun_id
));
1350 err
= mlx5_encap_alloc(priv
->mdev
, e
->tunnel_type
,
1351 ipv6_encap_size
, encap_header
, &e
->encap_id
);
1355 kfree(encap_header
);
1359 static int mlx5e_attach_encap(struct mlx5e_priv
*priv
,
1360 struct ip_tunnel_info
*tun_info
,
1361 struct net_device
*mirred_dev
,
1362 struct mlx5_esw_flow_attr
*attr
)
1364 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1365 struct net_device
*up_dev
= mlx5_eswitch_get_uplink_netdev(esw
);
1366 struct mlx5e_priv
*up_priv
= netdev_priv(up_dev
);
1367 unsigned short family
= ip_tunnel_info_af(tun_info
);
1368 struct ip_tunnel_key
*key
= &tun_info
->key
;
1369 struct mlx5_encap_entry
*e
;
1370 struct net_device
*out_dev
;
1371 int tunnel_type
, err
= -EOPNOTSUPP
;
1375 /* udp dst port must be set */
1376 if (!memchr_inv(&key
->tp_dst
, 0, sizeof(key
->tp_dst
)))
1377 goto vxlan_encap_offload_err
;
1379 /* setting udp src port isn't supported */
1380 if (memchr_inv(&key
->tp_src
, 0, sizeof(key
->tp_src
))) {
1381 vxlan_encap_offload_err
:
1382 netdev_warn(priv
->netdev
,
1383 "must set udp dst port and not set udp src port\n");
1387 if (mlx5e_vxlan_lookup_port(up_priv
, be16_to_cpu(key
->tp_dst
)) &&
1388 MLX5_CAP_ESW(priv
->mdev
, vxlan_encap_decap
)) {
1389 tunnel_type
= MLX5_HEADER_TYPE_VXLAN
;
1391 netdev_warn(priv
->netdev
,
1392 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key
->tp_dst
));
1396 hash_key
= hash_encap_info(key
);
1398 hash_for_each_possible_rcu(esw
->offloads
.encap_tbl
, e
,
1399 encap_hlist
, hash_key
) {
1400 if (!cmp_encap_info(&e
->tun_info
.key
, key
)) {
1411 e
= kzalloc(sizeof(*e
), GFP_KERNEL
);
1415 e
->tun_info
= *tun_info
;
1416 e
->tunnel_type
= tunnel_type
;
1417 INIT_LIST_HEAD(&e
->flows
);
1419 if (family
== AF_INET
)
1420 err
= mlx5e_create_encap_header_ipv4(priv
, mirred_dev
, e
, &out_dev
);
1421 else if (family
== AF_INET6
)
1422 err
= mlx5e_create_encap_header_ipv6(priv
, mirred_dev
, e
, &out_dev
);
1428 hash_add_rcu(esw
->offloads
.encap_tbl
, &e
->encap_hlist
, hash_key
);
1437 static int parse_tc_fdb_actions(struct mlx5e_priv
*priv
, struct tcf_exts
*exts
,
1438 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
1439 struct mlx5e_tc_flow
*flow
)
1441 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
1442 struct ip_tunnel_info
*info
= NULL
;
1443 const struct tc_action
*a
;
1448 if (tc_no_actions(exts
))
1451 memset(attr
, 0, sizeof(*attr
));
1452 attr
->in_rep
= priv
->ppriv
;
1454 tcf_exts_to_list(exts
, &actions
);
1455 list_for_each_entry(a
, &actions
, list
) {
1456 if (is_tcf_gact_shot(a
)) {
1457 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
|
1458 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1462 if (is_tcf_pedit(a
)) {
1463 err
= parse_tc_pedit_action(priv
, a
, MLX5_FLOW_NAMESPACE_FDB
,
1468 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
1472 if (is_tcf_mirred_egress_redirect(a
)) {
1473 int ifindex
= tcf_mirred_ifindex(a
);
1474 struct net_device
*out_dev
;
1475 struct mlx5e_priv
*out_priv
;
1477 out_dev
= __dev_get_by_index(dev_net(priv
->netdev
), ifindex
);
1479 if (switchdev_port_same_parent_id(priv
->netdev
,
1481 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
1482 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1483 out_priv
= netdev_priv(out_dev
);
1484 attr
->out_rep
= out_priv
->ppriv
;
1486 err
= mlx5e_attach_encap(priv
, info
,
1490 list_add(&flow
->encap
, &attr
->encap
->flows
);
1491 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_ENCAP
|
1492 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
1493 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1494 out_priv
= netdev_priv(attr
->encap
->out_dev
);
1495 attr
->out_rep
= out_priv
->ppriv
;
1497 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
1498 priv
->netdev
->name
, out_dev
->name
);
1504 if (is_tcf_tunnel_set(a
)) {
1505 info
= tcf_tunnel_info(a
);
1513 if (is_tcf_vlan(a
)) {
1514 if (tcf_vlan_action(a
) == TCA_VLAN_ACT_POP
) {
1515 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
1516 } else if (tcf_vlan_action(a
) == TCA_VLAN_ACT_PUSH
) {
1517 if (tcf_vlan_push_proto(a
) != htons(ETH_P_8021Q
))
1520 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
;
1521 attr
->vlan
= tcf_vlan_push_vid(a
);
1522 } else { /* action is TCA_VLAN_ACT_MODIFY */
1528 if (is_tcf_tunnel_release(a
)) {
1529 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DECAP
;
1538 int mlx5e_configure_flower(struct mlx5e_priv
*priv
, __be16 protocol
,
1539 struct tc_cls_flower_offload
*f
)
1541 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1542 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
1543 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1544 struct mlx5e_tc_flow
*flow
;
1545 int attr_size
, err
= 0;
1548 if (esw
&& esw
->mode
== SRIOV_OFFLOADS
) {
1549 flow_flags
= MLX5E_TC_FLOW_ESWITCH
;
1550 attr_size
= sizeof(struct mlx5_esw_flow_attr
);
1552 flow_flags
= MLX5E_TC_FLOW_NIC
;
1553 attr_size
= sizeof(struct mlx5_nic_flow_attr
);
1556 flow
= kzalloc(sizeof(*flow
) + attr_size
, GFP_KERNEL
);
1557 parse_attr
= mlx5_vzalloc(sizeof(*parse_attr
));
1558 if (!parse_attr
|| !flow
) {
1563 flow
->cookie
= f
->cookie
;
1564 flow
->flags
= flow_flags
;
1566 err
= parse_cls_flower(priv
, flow
, &parse_attr
->spec
, f
);
1570 if (flow
->flags
& MLX5E_TC_FLOW_ESWITCH
) {
1571 err
= parse_tc_fdb_actions(priv
, f
->exts
, parse_attr
, flow
);
1574 flow
->rule
= mlx5e_tc_add_fdb_flow(priv
, parse_attr
, flow
);
1576 err
= parse_tc_nic_actions(priv
, f
->exts
, parse_attr
, flow
);
1579 flow
->rule
= mlx5e_tc_add_nic_flow(priv
, parse_attr
, flow
);
1582 if (IS_ERR(flow
->rule
)) {
1583 err
= PTR_ERR(flow
->rule
);
1587 err
= rhashtable_insert_fast(&tc
->ht
, &flow
->node
,
1595 mlx5e_tc_del_flow(priv
, flow
);
1604 int mlx5e_delete_flower(struct mlx5e_priv
*priv
,
1605 struct tc_cls_flower_offload
*f
)
1607 struct mlx5e_tc_flow
*flow
;
1608 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1610 flow
= rhashtable_lookup_fast(&tc
->ht
, &f
->cookie
,
1615 rhashtable_remove_fast(&tc
->ht
, &flow
->node
, tc
->ht_params
);
1617 mlx5e_tc_del_flow(priv
, flow
);
1625 int mlx5e_stats_flower(struct mlx5e_priv
*priv
,
1626 struct tc_cls_flower_offload
*f
)
1628 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1629 struct mlx5e_tc_flow
*flow
;
1630 struct tc_action
*a
;
1631 struct mlx5_fc
*counter
;
1637 flow
= rhashtable_lookup_fast(&tc
->ht
, &f
->cookie
,
1642 counter
= mlx5_flow_rule_counter(flow
->rule
);
1646 mlx5_fc_query_cached(counter
, &bytes
, &packets
, &lastuse
);
1650 tcf_exts_to_list(f
->exts
, &actions
);
1651 list_for_each_entry(a
, &actions
, list
)
1652 tcf_action_stats_update(a
, bytes
, packets
, lastuse
);
1659 static const struct rhashtable_params mlx5e_tc_flow_ht_params
= {
1660 .head_offset
= offsetof(struct mlx5e_tc_flow
, node
),
1661 .key_offset
= offsetof(struct mlx5e_tc_flow
, cookie
),
1662 .key_len
= sizeof(((struct mlx5e_tc_flow
*)0)->cookie
),
1663 .automatic_shrinking
= true,
1666 int mlx5e_tc_init(struct mlx5e_priv
*priv
)
1668 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1670 tc
->ht_params
= mlx5e_tc_flow_ht_params
;
1671 return rhashtable_init(&tc
->ht
, &tc
->ht_params
);
1674 static void _mlx5e_tc_del_flow(void *ptr
, void *arg
)
1676 struct mlx5e_tc_flow
*flow
= ptr
;
1677 struct mlx5e_priv
*priv
= arg
;
1679 mlx5e_tc_del_flow(priv
, flow
);
1683 void mlx5e_tc_cleanup(struct mlx5e_priv
*priv
)
1685 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1687 rhashtable_free_and_destroy(&tc
->ht
, _mlx5e_tc_del_flow
, priv
);
1689 if (!IS_ERR_OR_NULL(tc
->t
)) {
1690 mlx5_destroy_flow_table(tc
->t
);