2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <net/switchdev.h>
42 #include <net/tc_act/tc_mirred.h>
43 #include <net/tc_act/tc_vlan.h>
44 #include <net/tc_act/tc_tunnel_key.h>
45 #include <net/tc_act/tc_pedit.h>
46 #include <net/vxlan.h>
52 struct mlx5_nic_flow_attr
{
59 MLX5E_TC_FLOW_ESWITCH
= BIT(0),
60 MLX5E_TC_FLOW_NIC
= BIT(1),
63 struct mlx5e_tc_flow
{
64 struct rhash_head node
;
67 struct mlx5_flow_handle
*rule
;
68 struct list_head encap
; /* flows sharing the same encap */
70 struct mlx5_esw_flow_attr esw_attr
[0];
71 struct mlx5_nic_flow_attr nic_attr
[0];
75 struct mlx5e_tc_flow_parse_attr
{
76 struct mlx5_flow_spec spec
;
77 int num_mod_hdr_actions
;
78 void *mod_hdr_actions
;
82 MLX5_HEADER_TYPE_VXLAN
= 0x0,
83 MLX5_HEADER_TYPE_NVGRE
= 0x1,
86 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
87 #define MLX5E_TC_TABLE_NUM_GROUPS 4
89 static struct mlx5_flow_handle
*
90 mlx5e_tc_add_nic_flow(struct mlx5e_priv
*priv
,
91 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
92 struct mlx5e_tc_flow
*flow
)
94 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
95 struct mlx5_core_dev
*dev
= priv
->mdev
;
96 struct mlx5_flow_destination dest
= {};
97 struct mlx5_flow_act flow_act
= {
98 .action
= attr
->action
,
99 .flow_tag
= attr
->flow_tag
,
102 struct mlx5_fc
*counter
= NULL
;
103 struct mlx5_flow_handle
*rule
;
104 bool table_created
= false;
107 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
108 dest
.type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
109 dest
.ft
= priv
->fs
.vlan
.ft
.t
;
110 } else if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
111 counter
= mlx5_fc_create(dev
, true);
113 return ERR_CAST(counter
);
115 dest
.type
= MLX5_FLOW_DESTINATION_TYPE_COUNTER
;
116 dest
.counter
= counter
;
119 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
120 err
= mlx5_modify_header_alloc(dev
, MLX5_FLOW_NAMESPACE_KERNEL
,
121 parse_attr
->num_mod_hdr_actions
,
122 parse_attr
->mod_hdr_actions
,
124 flow_act
.modify_id
= attr
->mod_hdr_id
;
125 kfree(parse_attr
->mod_hdr_actions
);
128 goto err_create_mod_hdr_id
;
132 if (IS_ERR_OR_NULL(priv
->fs
.tc
.t
)) {
134 mlx5_create_auto_grouped_flow_table(priv
->fs
.ns
,
136 MLX5E_TC_TABLE_NUM_ENTRIES
,
137 MLX5E_TC_TABLE_NUM_GROUPS
,
139 if (IS_ERR(priv
->fs
.tc
.t
)) {
140 netdev_err(priv
->netdev
,
141 "Failed to create tc offload table\n");
142 rule
= ERR_CAST(priv
->fs
.tc
.t
);
146 table_created
= true;
149 parse_attr
->spec
.match_criteria_enable
= MLX5_MATCH_OUTER_HEADERS
;
150 rule
= mlx5_add_flow_rules(priv
->fs
.tc
.t
, &parse_attr
->spec
,
151 &flow_act
, &dest
, 1);
160 mlx5_destroy_flow_table(priv
->fs
.tc
.t
);
161 priv
->fs
.tc
.t
= NULL
;
164 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
165 mlx5_modify_header_dealloc(priv
->mdev
,
167 err_create_mod_hdr_id
:
168 mlx5_fc_destroy(dev
, counter
);
173 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv
*priv
,
174 struct mlx5e_tc_flow
*flow
)
176 struct mlx5_fc
*counter
= NULL
;
178 counter
= mlx5_flow_rule_counter(flow
->rule
);
179 mlx5_del_flow_rules(flow
->rule
);
180 mlx5_fc_destroy(priv
->mdev
, counter
);
182 if (!mlx5e_tc_num_filters(priv
) && (priv
->fs
.tc
.t
)) {
183 mlx5_destroy_flow_table(priv
->fs
.tc
.t
);
184 priv
->fs
.tc
.t
= NULL
;
187 if (flow
->nic_attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
188 mlx5_modify_header_dealloc(priv
->mdev
,
189 flow
->nic_attr
->mod_hdr_id
);
192 static void mlx5e_detach_encap(struct mlx5e_priv
*priv
,
193 struct mlx5e_tc_flow
*flow
);
195 static struct mlx5_flow_handle
*
196 mlx5e_tc_add_fdb_flow(struct mlx5e_priv
*priv
,
197 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
198 struct mlx5e_tc_flow
*flow
)
200 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
201 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
202 struct mlx5_flow_handle
*rule
;
205 err
= mlx5_eswitch_add_vlan_action(esw
, attr
);
211 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
212 err
= mlx5_modify_header_alloc(priv
->mdev
, MLX5_FLOW_NAMESPACE_FDB
,
213 parse_attr
->num_mod_hdr_actions
,
214 parse_attr
->mod_hdr_actions
,
216 kfree(parse_attr
->mod_hdr_actions
);
223 rule
= mlx5_eswitch_add_offloaded_rule(esw
, &parse_attr
->spec
, attr
);
230 if (flow
->esw_attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
231 mlx5_modify_header_dealloc(priv
->mdev
,
234 mlx5_eswitch_del_vlan_action(esw
, attr
);
236 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_ENCAP
)
237 mlx5e_detach_encap(priv
, flow
);
241 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv
*priv
,
242 struct mlx5e_tc_flow
*flow
)
244 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
245 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
247 mlx5_eswitch_del_offloaded_rule(esw
, flow
->rule
, flow
->esw_attr
);
249 mlx5_eswitch_del_vlan_action(esw
, flow
->esw_attr
);
251 if (flow
->esw_attr
->action
& MLX5_FLOW_CONTEXT_ACTION_ENCAP
)
252 mlx5e_detach_encap(priv
, flow
);
254 if (flow
->esw_attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
255 mlx5_modify_header_dealloc(priv
->mdev
,
259 static void mlx5e_detach_encap(struct mlx5e_priv
*priv
,
260 struct mlx5e_tc_flow
*flow
)
262 struct list_head
*next
= flow
->encap
.next
;
264 list_del(&flow
->encap
);
265 if (list_empty(next
)) {
266 struct mlx5_encap_entry
*e
;
268 e
= list_entry(next
, struct mlx5_encap_entry
, flows
);
270 mlx5_encap_dealloc(priv
->mdev
, e
->encap_id
);
273 hlist_del_rcu(&e
->encap_hlist
);
278 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
279 struct mlx5e_tc_flow
*flow
)
281 if (flow
->flags
& MLX5E_TC_FLOW_ESWITCH
)
282 mlx5e_tc_del_fdb_flow(priv
, flow
);
284 mlx5e_tc_del_nic_flow(priv
, flow
);
287 static void parse_vxlan_attr(struct mlx5_flow_spec
*spec
,
288 struct tc_cls_flower_offload
*f
)
290 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
292 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
294 void *misc_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
296 void *misc_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
299 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
);
300 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
, IPPROTO_UDP
);
302 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_KEYID
)) {
303 struct flow_dissector_key_keyid
*key
=
304 skb_flow_dissector_target(f
->dissector
,
305 FLOW_DISSECTOR_KEY_ENC_KEYID
,
307 struct flow_dissector_key_keyid
*mask
=
308 skb_flow_dissector_target(f
->dissector
,
309 FLOW_DISSECTOR_KEY_ENC_KEYID
,
311 MLX5_SET(fte_match_set_misc
, misc_c
, vxlan_vni
,
312 be32_to_cpu(mask
->keyid
));
313 MLX5_SET(fte_match_set_misc
, misc_v
, vxlan_vni
,
314 be32_to_cpu(key
->keyid
));
318 static int parse_tunnel_attr(struct mlx5e_priv
*priv
,
319 struct mlx5_flow_spec
*spec
,
320 struct tc_cls_flower_offload
*f
)
322 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
324 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
327 struct flow_dissector_key_control
*enc_control
=
328 skb_flow_dissector_target(f
->dissector
,
329 FLOW_DISSECTOR_KEY_ENC_CONTROL
,
332 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_PORTS
)) {
333 struct flow_dissector_key_ports
*key
=
334 skb_flow_dissector_target(f
->dissector
,
335 FLOW_DISSECTOR_KEY_ENC_PORTS
,
337 struct flow_dissector_key_ports
*mask
=
338 skb_flow_dissector_target(f
->dissector
,
339 FLOW_DISSECTOR_KEY_ENC_PORTS
,
341 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
342 struct net_device
*up_dev
= mlx5_eswitch_get_uplink_netdev(esw
);
343 struct mlx5e_priv
*up_priv
= netdev_priv(up_dev
);
345 /* Full udp dst port must be given */
346 if (memchr_inv(&mask
->dst
, 0xff, sizeof(mask
->dst
)))
347 goto vxlan_match_offload_err
;
349 if (mlx5e_vxlan_lookup_port(up_priv
, be16_to_cpu(key
->dst
)) &&
350 MLX5_CAP_ESW(priv
->mdev
, vxlan_encap_decap
))
351 parse_vxlan_attr(spec
, f
);
353 netdev_warn(priv
->netdev
,
354 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key
->dst
));
358 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
359 udp_dport
, ntohs(mask
->dst
));
360 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
361 udp_dport
, ntohs(key
->dst
));
363 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
364 udp_sport
, ntohs(mask
->src
));
365 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
366 udp_sport
, ntohs(key
->src
));
367 } else { /* udp dst port must be given */
368 vxlan_match_offload_err
:
369 netdev_warn(priv
->netdev
,
370 "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
374 if (enc_control
->addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
375 struct flow_dissector_key_ipv4_addrs
*key
=
376 skb_flow_dissector_target(f
->dissector
,
377 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
,
379 struct flow_dissector_key_ipv4_addrs
*mask
=
380 skb_flow_dissector_target(f
->dissector
,
381 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
,
383 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
384 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
386 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
387 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
390 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
391 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
393 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
394 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
397 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ethertype
);
398 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
, ETH_P_IP
);
399 } else if (enc_control
->addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
400 struct flow_dissector_key_ipv6_addrs
*key
=
401 skb_flow_dissector_target(f
->dissector
,
402 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
,
404 struct flow_dissector_key_ipv6_addrs
*mask
=
405 skb_flow_dissector_target(f
->dissector
,
406 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
,
409 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
410 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
411 &mask
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
412 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
413 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
414 &key
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
416 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
417 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
418 &mask
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
419 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
420 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
421 &key
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
423 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ethertype
);
424 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
, ETH_P_IPV6
);
427 /* Enforce DMAC when offloading incoming tunneled flows.
428 * Flow counters require a match on the DMAC.
430 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_47_16
);
431 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_15_0
);
432 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
433 dmac_47_16
), priv
->netdev
->dev_addr
);
435 /* let software handle IP fragments */
436 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
437 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
, 0);
442 static int __parse_cls_flower(struct mlx5e_priv
*priv
,
443 struct mlx5_flow_spec
*spec
,
444 struct tc_cls_flower_offload
*f
,
447 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
449 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
454 *min_inline
= MLX5_INLINE_MODE_L2
;
456 if (f
->dissector
->used_keys
&
457 ~(BIT(FLOW_DISSECTOR_KEY_CONTROL
) |
458 BIT(FLOW_DISSECTOR_KEY_BASIC
) |
459 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS
) |
460 BIT(FLOW_DISSECTOR_KEY_VLAN
) |
461 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS
) |
462 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS
) |
463 BIT(FLOW_DISSECTOR_KEY_PORTS
) |
464 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID
) |
465 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) |
466 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
) |
467 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS
) |
468 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL
))) {
469 netdev_warn(priv
->netdev
, "Unsupported key used: 0x%x\n",
470 f
->dissector
->used_keys
);
474 if ((dissector_uses_key(f
->dissector
,
475 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) ||
476 dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_KEYID
) ||
477 dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_PORTS
)) &&
478 dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_CONTROL
)) {
479 struct flow_dissector_key_control
*key
=
480 skb_flow_dissector_target(f
->dissector
,
481 FLOW_DISSECTOR_KEY_ENC_CONTROL
,
483 switch (key
->addr_type
) {
484 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
485 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
486 if (parse_tunnel_attr(priv
, spec
, f
))
493 /* In decap flow, header pointers should point to the inner
494 * headers, outer header were already set by parse_tunnel_attr
496 headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
498 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
502 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_CONTROL
)) {
503 struct flow_dissector_key_control
*key
=
504 skb_flow_dissector_target(f
->dissector
,
505 FLOW_DISSECTOR_KEY_CONTROL
,
508 struct flow_dissector_key_control
*mask
=
509 skb_flow_dissector_target(f
->dissector
,
510 FLOW_DISSECTOR_KEY_CONTROL
,
512 addr_type
= key
->addr_type
;
514 if (mask
->flags
& FLOW_DIS_IS_FRAGMENT
) {
515 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
516 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
,
517 key
->flags
& FLOW_DIS_IS_FRAGMENT
);
519 /* the HW doesn't need L3 inline to match on frag=no */
520 if (key
->flags
& FLOW_DIS_IS_FRAGMENT
)
521 *min_inline
= MLX5_INLINE_MODE_IP
;
525 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_BASIC
)) {
526 struct flow_dissector_key_basic
*key
=
527 skb_flow_dissector_target(f
->dissector
,
528 FLOW_DISSECTOR_KEY_BASIC
,
530 struct flow_dissector_key_basic
*mask
=
531 skb_flow_dissector_target(f
->dissector
,
532 FLOW_DISSECTOR_KEY_BASIC
,
534 ip_proto
= key
->ip_proto
;
536 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ethertype
,
537 ntohs(mask
->n_proto
));
538 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
539 ntohs(key
->n_proto
));
541 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
,
543 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
,
547 *min_inline
= MLX5_INLINE_MODE_IP
;
550 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ETH_ADDRS
)) {
551 struct flow_dissector_key_eth_addrs
*key
=
552 skb_flow_dissector_target(f
->dissector
,
553 FLOW_DISSECTOR_KEY_ETH_ADDRS
,
555 struct flow_dissector_key_eth_addrs
*mask
=
556 skb_flow_dissector_target(f
->dissector
,
557 FLOW_DISSECTOR_KEY_ETH_ADDRS
,
560 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
563 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
567 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
570 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
575 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_VLAN
)) {
576 struct flow_dissector_key_vlan
*key
=
577 skb_flow_dissector_target(f
->dissector
,
578 FLOW_DISSECTOR_KEY_VLAN
,
580 struct flow_dissector_key_vlan
*mask
=
581 skb_flow_dissector_target(f
->dissector
,
582 FLOW_DISSECTOR_KEY_VLAN
,
584 if (mask
->vlan_id
|| mask
->vlan_priority
) {
585 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
, 1);
586 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
, 1);
588 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_vid
, mask
->vlan_id
);
589 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_vid
, key
->vlan_id
);
591 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_prio
, mask
->vlan_priority
);
592 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_prio
, key
->vlan_priority
);
596 if (addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
597 struct flow_dissector_key_ipv4_addrs
*key
=
598 skb_flow_dissector_target(f
->dissector
,
599 FLOW_DISSECTOR_KEY_IPV4_ADDRS
,
601 struct flow_dissector_key_ipv4_addrs
*mask
=
602 skb_flow_dissector_target(f
->dissector
,
603 FLOW_DISSECTOR_KEY_IPV4_ADDRS
,
606 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
607 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
608 &mask
->src
, sizeof(mask
->src
));
609 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
610 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
611 &key
->src
, sizeof(key
->src
));
612 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
613 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
614 &mask
->dst
, sizeof(mask
->dst
));
615 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
616 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
617 &key
->dst
, sizeof(key
->dst
));
619 if (mask
->src
|| mask
->dst
)
620 *min_inline
= MLX5_INLINE_MODE_IP
;
623 if (addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
624 struct flow_dissector_key_ipv6_addrs
*key
=
625 skb_flow_dissector_target(f
->dissector
,
626 FLOW_DISSECTOR_KEY_IPV6_ADDRS
,
628 struct flow_dissector_key_ipv6_addrs
*mask
=
629 skb_flow_dissector_target(f
->dissector
,
630 FLOW_DISSECTOR_KEY_IPV6_ADDRS
,
633 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
634 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
635 &mask
->src
, sizeof(mask
->src
));
636 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
637 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
638 &key
->src
, sizeof(key
->src
));
640 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
641 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
642 &mask
->dst
, sizeof(mask
->dst
));
643 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
644 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
645 &key
->dst
, sizeof(key
->dst
));
647 if (ipv6_addr_type(&mask
->src
) != IPV6_ADDR_ANY
||
648 ipv6_addr_type(&mask
->dst
) != IPV6_ADDR_ANY
)
649 *min_inline
= MLX5_INLINE_MODE_IP
;
652 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_PORTS
)) {
653 struct flow_dissector_key_ports
*key
=
654 skb_flow_dissector_target(f
->dissector
,
655 FLOW_DISSECTOR_KEY_PORTS
,
657 struct flow_dissector_key_ports
*mask
=
658 skb_flow_dissector_target(f
->dissector
,
659 FLOW_DISSECTOR_KEY_PORTS
,
663 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
664 tcp_sport
, ntohs(mask
->src
));
665 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
666 tcp_sport
, ntohs(key
->src
));
668 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
669 tcp_dport
, ntohs(mask
->dst
));
670 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
671 tcp_dport
, ntohs(key
->dst
));
675 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
676 udp_sport
, ntohs(mask
->src
));
677 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
678 udp_sport
, ntohs(key
->src
));
680 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
681 udp_dport
, ntohs(mask
->dst
));
682 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
683 udp_dport
, ntohs(key
->dst
));
686 netdev_err(priv
->netdev
,
687 "Only UDP and TCP transport are supported\n");
691 if (mask
->src
|| mask
->dst
)
692 *min_inline
= MLX5_INLINE_MODE_TCP_UDP
;
698 static int parse_cls_flower(struct mlx5e_priv
*priv
,
699 struct mlx5e_tc_flow
*flow
,
700 struct mlx5_flow_spec
*spec
,
701 struct tc_cls_flower_offload
*f
)
703 struct mlx5_core_dev
*dev
= priv
->mdev
;
704 struct mlx5_eswitch
*esw
= dev
->priv
.eswitch
;
705 struct mlx5_eswitch_rep
*rep
= priv
->ppriv
;
709 err
= __parse_cls_flower(priv
, spec
, f
, &min_inline
);
711 if (!err
&& (flow
->flags
& MLX5E_TC_FLOW_ESWITCH
) &&
712 rep
->vport
!= FDB_UPLINK_VPORT
) {
713 if (min_inline
> esw
->offloads
.inline_mode
) {
714 netdev_warn(priv
->netdev
,
715 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
716 min_inline
, esw
->offloads
.inline_mode
);
724 struct pedit_headers
{
732 static int pedit_header_offsets
[] = {
733 [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH
] = offsetof(struct pedit_headers
, eth
),
734 [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4
] = offsetof(struct pedit_headers
, ip4
),
735 [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
] = offsetof(struct pedit_headers
, ip6
),
736 [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP
] = offsetof(struct pedit_headers
, tcp
),
737 [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP
] = offsetof(struct pedit_headers
, udp
),
740 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
742 static int set_pedit_val(u8 hdr_type
, u32 mask
, u32 val
, u32 offset
,
743 struct pedit_headers
*masks
,
744 struct pedit_headers
*vals
)
746 u32
*curr_pmask
, *curr_pval
;
748 if (hdr_type
>= __PEDIT_HDR_TYPE_MAX
)
751 curr_pmask
= (u32
*)(pedit_header(masks
, hdr_type
) + offset
);
752 curr_pval
= (u32
*)(pedit_header(vals
, hdr_type
) + offset
);
754 if (*curr_pmask
& mask
) /* disallow acting twice on the same location */
758 *curr_pval
|= (val
& mask
);
772 static struct mlx5_fields fields
[] = {
773 {MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16
, 4, offsetof(struct pedit_headers
, eth
.h_dest
[0])},
774 {MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0
, 2, offsetof(struct pedit_headers
, eth
.h_dest
[4])},
775 {MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16
, 4, offsetof(struct pedit_headers
, eth
.h_source
[0])},
776 {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0
, 2, offsetof(struct pedit_headers
, eth
.h_source
[4])},
777 {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE
, 2, offsetof(struct pedit_headers
, eth
.h_proto
)},
779 {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP
, 1, offsetof(struct pedit_headers
, ip4
.tos
)},
780 {MLX5_ACTION_IN_FIELD_OUT_IP_TTL
, 1, offsetof(struct pedit_headers
, ip4
.ttl
)},
781 {MLX5_ACTION_IN_FIELD_OUT_SIPV4
, 4, offsetof(struct pedit_headers
, ip4
.saddr
)},
782 {MLX5_ACTION_IN_FIELD_OUT_DIPV4
, 4, offsetof(struct pedit_headers
, ip4
.daddr
)},
784 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96
, 4, offsetof(struct pedit_headers
, ip6
.saddr
.s6_addr32
[0])},
785 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64
, 4, offsetof(struct pedit_headers
, ip6
.saddr
.s6_addr32
[1])},
786 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32
, 4, offsetof(struct pedit_headers
, ip6
.saddr
.s6_addr32
[2])},
787 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0
, 4, offsetof(struct pedit_headers
, ip6
.saddr
.s6_addr32
[3])},
788 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96
, 4, offsetof(struct pedit_headers
, ip6
.daddr
.s6_addr32
[0])},
789 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64
, 4, offsetof(struct pedit_headers
, ip6
.daddr
.s6_addr32
[1])},
790 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32
, 4, offsetof(struct pedit_headers
, ip6
.daddr
.s6_addr32
[2])},
791 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0
, 4, offsetof(struct pedit_headers
, ip6
.daddr
.s6_addr32
[3])},
793 {MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT
, 2, offsetof(struct pedit_headers
, tcp
.source
)},
794 {MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT
, 2, offsetof(struct pedit_headers
, tcp
.dest
)},
795 {MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS
, 1, offsetof(struct pedit_headers
, tcp
.ack_seq
) + 5},
797 {MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT
, 2, offsetof(struct pedit_headers
, udp
.source
)},
798 {MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT
, 2, offsetof(struct pedit_headers
, udp
.dest
)},
801 /* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at
802 * max from the SW pedit action. On success, it says how many HW actions were
805 static int offload_pedit_fields(struct pedit_headers
*masks
,
806 struct pedit_headers
*vals
,
807 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
809 struct pedit_headers
*set_masks
, *add_masks
, *set_vals
, *add_vals
;
810 int i
, action_size
, nactions
, max_actions
, first
, last
;
811 void *s_masks_p
, *a_masks_p
, *vals_p
;
812 u32 s_mask
, a_mask
, val
;
813 struct mlx5_fields
*f
;
818 set_masks
= &masks
[TCA_PEDIT_KEY_EX_CMD_SET
];
819 add_masks
= &masks
[TCA_PEDIT_KEY_EX_CMD_ADD
];
820 set_vals
= &vals
[TCA_PEDIT_KEY_EX_CMD_SET
];
821 add_vals
= &vals
[TCA_PEDIT_KEY_EX_CMD_ADD
];
823 action_size
= MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto
);
824 action
= parse_attr
->mod_hdr_actions
;
825 max_actions
= parse_attr
->num_mod_hdr_actions
;
828 for (i
= 0; i
< ARRAY_SIZE(fields
); i
++) {
830 /* avoid seeing bits set from previous iterations */
831 s_mask
= a_mask
= mask
= val
= 0;
833 s_masks_p
= (void *)set_masks
+ f
->offset
;
834 a_masks_p
= (void *)add_masks
+ f
->offset
;
836 memcpy(&s_mask
, s_masks_p
, f
->size
);
837 memcpy(&a_mask
, a_masks_p
, f
->size
);
839 if (!s_mask
&& !a_mask
) /* nothing to offload here */
842 if (s_mask
&& a_mask
) {
843 printk(KERN_WARNING
"mlx5: can't set and add to the same HW field (%x)\n", f
->field
);
847 if (nactions
== max_actions
) {
848 printk(KERN_WARNING
"mlx5: parsed %d pedit actions, can't do more\n", nactions
);
853 cmd
= MLX5_ACTION_TYPE_SET
;
855 vals_p
= (void *)set_vals
+ f
->offset
;
856 /* clear to denote we consumed this field */
857 memset(s_masks_p
, 0, f
->size
);
859 cmd
= MLX5_ACTION_TYPE_ADD
;
861 vals_p
= (void *)add_vals
+ f
->offset
;
862 /* clear to denote we consumed this field */
863 memset(a_masks_p
, 0, f
->size
);
866 memcpy(&val
, vals_p
, f
->size
);
868 field_bsize
= f
->size
* BITS_PER_BYTE
;
869 first
= find_first_bit(&mask
, field_bsize
);
870 last
= find_last_bit(&mask
, field_bsize
);
871 if (first
> 0 || last
!= (field_bsize
- 1)) {
872 printk(KERN_WARNING
"mlx5: partial rewrite (mask %lx) is currently not offloaded\n",
877 MLX5_SET(set_action_in
, action
, action_type
, cmd
);
878 MLX5_SET(set_action_in
, action
, field
, f
->field
);
880 if (cmd
== MLX5_ACTION_TYPE_SET
) {
881 MLX5_SET(set_action_in
, action
, offset
, 0);
882 /* length is num of bits to be written, zero means length of 32 */
883 MLX5_SET(set_action_in
, action
, length
, field_bsize
);
886 if (field_bsize
== 32)
887 MLX5_SET(set_action_in
, action
, data
, ntohl(val
));
888 else if (field_bsize
== 16)
889 MLX5_SET(set_action_in
, action
, data
, ntohs(val
));
890 else if (field_bsize
== 8)
891 MLX5_SET(set_action_in
, action
, data
, val
);
893 action
+= action_size
;
897 parse_attr
->num_mod_hdr_actions
= nactions
;
901 static int alloc_mod_hdr_actions(struct mlx5e_priv
*priv
,
902 const struct tc_action
*a
, int namespace,
903 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
905 int nkeys
, action_size
, max_actions
;
907 nkeys
= tcf_pedit_nkeys(a
);
908 action_size
= MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto
);
910 if (namespace == MLX5_FLOW_NAMESPACE_FDB
) /* FDB offloading */
911 max_actions
= MLX5_CAP_ESW_FLOWTABLE_FDB(priv
->mdev
, max_modify_header_actions
);
912 else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
913 max_actions
= MLX5_CAP_FLOWTABLE_NIC_RX(priv
->mdev
, max_modify_header_actions
);
915 /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
916 max_actions
= min(max_actions
, nkeys
* 16);
918 parse_attr
->mod_hdr_actions
= kcalloc(max_actions
, action_size
, GFP_KERNEL
);
919 if (!parse_attr
->mod_hdr_actions
)
922 parse_attr
->num_mod_hdr_actions
= max_actions
;
926 static const struct pedit_headers zero_masks
= {};
928 static int parse_tc_pedit_action(struct mlx5e_priv
*priv
,
929 const struct tc_action
*a
, int namespace,
930 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
932 struct pedit_headers masks
[__PEDIT_CMD_MAX
], vals
[__PEDIT_CMD_MAX
], *cmd_masks
;
933 int nkeys
, i
, err
= -EOPNOTSUPP
;
934 u32 mask
, val
, offset
;
937 nkeys
= tcf_pedit_nkeys(a
);
939 memset(masks
, 0, sizeof(struct pedit_headers
) * __PEDIT_CMD_MAX
);
940 memset(vals
, 0, sizeof(struct pedit_headers
) * __PEDIT_CMD_MAX
);
942 for (i
= 0; i
< nkeys
; i
++) {
943 htype
= tcf_pedit_htype(a
, i
);
944 cmd
= tcf_pedit_cmd(a
, i
);
945 err
= -EOPNOTSUPP
; /* can't be all optimistic */
947 if (htype
== TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK
) {
948 printk(KERN_WARNING
"mlx5: legacy pedit isn't offloaded\n");
952 if (cmd
!= TCA_PEDIT_KEY_EX_CMD_SET
&& cmd
!= TCA_PEDIT_KEY_EX_CMD_ADD
) {
953 printk(KERN_WARNING
"mlx5: pedit cmd %d isn't offloaded\n", cmd
);
957 mask
= tcf_pedit_mask(a
, i
);
958 val
= tcf_pedit_val(a
, i
);
959 offset
= tcf_pedit_offset(a
, i
);
961 err
= set_pedit_val(htype
, ~mask
, val
, offset
, &masks
[cmd
], &vals
[cmd
]);
966 err
= alloc_mod_hdr_actions(priv
, a
, namespace, parse_attr
);
970 err
= offload_pedit_fields(masks
, vals
, parse_attr
);
972 goto out_dealloc_parsed_actions
;
974 for (cmd
= 0; cmd
< __PEDIT_CMD_MAX
; cmd
++) {
975 cmd_masks
= &masks
[cmd
];
976 if (memcmp(cmd_masks
, &zero_masks
, sizeof(zero_masks
))) {
977 printk(KERN_WARNING
"mlx5: attempt to offload an unsupported field (cmd %d)\n",
979 print_hex_dump(KERN_WARNING
, "mask: ", DUMP_PREFIX_ADDRESS
,
980 16, 1, cmd_masks
, sizeof(zero_masks
), true);
982 goto out_dealloc_parsed_actions
;
988 out_dealloc_parsed_actions
:
989 kfree(parse_attr
->mod_hdr_actions
);
994 static int parse_tc_nic_actions(struct mlx5e_priv
*priv
, struct tcf_exts
*exts
,
995 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
996 struct mlx5e_tc_flow
*flow
)
998 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
999 const struct tc_action
*a
;
1003 if (tc_no_actions(exts
))
1006 attr
->flow_tag
= MLX5_FS_DEFAULT_FLOW_TAG
;
1009 tcf_exts_to_list(exts
, &actions
);
1010 list_for_each_entry(a
, &actions
, list
) {
1011 /* Only support a single action per rule */
1015 if (is_tcf_gact_shot(a
)) {
1016 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
;
1017 if (MLX5_CAP_FLOWTABLE(priv
->mdev
,
1018 flow_table_properties_nic_receive
.flow_counter
))
1019 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1023 if (is_tcf_pedit(a
)) {
1024 err
= parse_tc_pedit_action(priv
, a
, MLX5_FLOW_NAMESPACE_KERNEL
,
1029 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
|
1030 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1034 if (is_tcf_skbedit_mark(a
)) {
1035 u32 mark
= tcf_skbedit_mark(a
);
1037 if (mark
& ~MLX5E_TC_FLOW_ID_MASK
) {
1038 netdev_warn(priv
->netdev
, "Bad flow mark - only 16 bit is supported: 0x%x\n",
1043 attr
->flow_tag
= mark
;
1044 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1054 static inline int cmp_encap_info(struct ip_tunnel_key
*a
,
1055 struct ip_tunnel_key
*b
)
1057 return memcmp(a
, b
, sizeof(*a
));
1060 static inline int hash_encap_info(struct ip_tunnel_key
*key
)
1062 return jhash(key
, sizeof(*key
), 0);
1065 static int mlx5e_route_lookup_ipv4(struct mlx5e_priv
*priv
,
1066 struct net_device
*mirred_dev
,
1067 struct net_device
**out_dev
,
1069 struct neighbour
**out_n
,
1072 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1074 struct neighbour
*n
= NULL
;
1076 #if IS_ENABLED(CONFIG_INET)
1079 rt
= ip_route_output_key(dev_net(mirred_dev
), fl4
);
1080 ret
= PTR_ERR_OR_ZERO(rt
);
1086 /* if the egress device isn't on the same HW e-switch, we use the uplink */
1087 if (!switchdev_port_same_parent_id(priv
->netdev
, rt
->dst
.dev
))
1088 *out_dev
= mlx5_eswitch_get_uplink_netdev(esw
);
1090 *out_dev
= rt
->dst
.dev
;
1092 *out_ttl
= ip4_dst_hoplimit(&rt
->dst
);
1093 n
= dst_neigh_lookup(&rt
->dst
, &fl4
->daddr
);
1102 static int mlx5e_route_lookup_ipv6(struct mlx5e_priv
*priv
,
1103 struct net_device
*mirred_dev
,
1104 struct net_device
**out_dev
,
1106 struct neighbour
**out_n
,
1109 struct neighbour
*n
= NULL
;
1110 struct dst_entry
*dst
;
1112 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
1113 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1116 dst
= ip6_route_output(dev_net(mirred_dev
), NULL
, fl6
);
1123 *out_ttl
= ip6_dst_hoplimit(dst
);
1125 /* if the egress device isn't on the same HW e-switch, we use the uplink */
1126 if (!switchdev_port_same_parent_id(priv
->netdev
, dst
->dev
))
1127 *out_dev
= mlx5_eswitch_get_uplink_netdev(esw
);
1129 *out_dev
= dst
->dev
;
1134 n
= dst_neigh_lookup(dst
, &fl6
->daddr
);
1143 static int gen_vxlan_header_ipv4(struct net_device
*out_dev
,
1145 unsigned char h_dest
[ETH_ALEN
],
1149 __be16 udp_dst_port
,
1152 int encap_size
= VXLAN_HLEN
+ sizeof(struct iphdr
) + ETH_HLEN
;
1153 struct ethhdr
*eth
= (struct ethhdr
*)buf
;
1154 struct iphdr
*ip
= (struct iphdr
*)((char *)eth
+ sizeof(struct ethhdr
));
1155 struct udphdr
*udp
= (struct udphdr
*)((char *)ip
+ sizeof(struct iphdr
));
1156 struct vxlanhdr
*vxh
= (struct vxlanhdr
*)((char *)udp
+ sizeof(struct udphdr
));
1158 memset(buf
, 0, encap_size
);
1160 ether_addr_copy(eth
->h_dest
, h_dest
);
1161 ether_addr_copy(eth
->h_source
, out_dev
->dev_addr
);
1162 eth
->h_proto
= htons(ETH_P_IP
);
1168 ip
->protocol
= IPPROTO_UDP
;
1172 udp
->dest
= udp_dst_port
;
1173 vxh
->vx_flags
= VXLAN_HF_VNI
;
1174 vxh
->vx_vni
= vxlan_vni_field(vx_vni
);
1179 static int gen_vxlan_header_ipv6(struct net_device
*out_dev
,
1181 unsigned char h_dest
[ETH_ALEN
],
1183 struct in6_addr
*daddr
,
1184 struct in6_addr
*saddr
,
1185 __be16 udp_dst_port
,
1188 int encap_size
= VXLAN_HLEN
+ sizeof(struct ipv6hdr
) + ETH_HLEN
;
1189 struct ethhdr
*eth
= (struct ethhdr
*)buf
;
1190 struct ipv6hdr
*ip6h
= (struct ipv6hdr
*)((char *)eth
+ sizeof(struct ethhdr
));
1191 struct udphdr
*udp
= (struct udphdr
*)((char *)ip6h
+ sizeof(struct ipv6hdr
));
1192 struct vxlanhdr
*vxh
= (struct vxlanhdr
*)((char *)udp
+ sizeof(struct udphdr
));
1194 memset(buf
, 0, encap_size
);
1196 ether_addr_copy(eth
->h_dest
, h_dest
);
1197 ether_addr_copy(eth
->h_source
, out_dev
->dev_addr
);
1198 eth
->h_proto
= htons(ETH_P_IPV6
);
1200 ip6_flow_hdr(ip6h
, 0, 0);
1201 /* the HW fills up ipv6 payload len */
1202 ip6h
->nexthdr
= IPPROTO_UDP
;
1203 ip6h
->hop_limit
= ttl
;
1204 ip6h
->daddr
= *daddr
;
1205 ip6h
->saddr
= *saddr
;
1207 udp
->dest
= udp_dst_port
;
1208 vxh
->vx_flags
= VXLAN_HF_VNI
;
1209 vxh
->vx_vni
= vxlan_vni_field(vx_vni
);
1214 static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv
*priv
,
1215 struct net_device
*mirred_dev
,
1216 struct mlx5_encap_entry
*e
,
1217 struct net_device
**out_dev
)
1219 int max_encap_size
= MLX5_CAP_ESW(priv
->mdev
, max_encap_header_size
);
1220 struct ip_tunnel_key
*tun_key
= &e
->tun_info
.key
;
1221 int encap_size
, ttl
, err
;
1222 struct neighbour
*n
= NULL
;
1223 struct flowi4 fl4
= {};
1226 encap_header
= kzalloc(max_encap_size
, GFP_KERNEL
);
1230 switch (e
->tunnel_type
) {
1231 case MLX5_HEADER_TYPE_VXLAN
:
1232 fl4
.flowi4_proto
= IPPROTO_UDP
;
1233 fl4
.fl4_dport
= tun_key
->tp_dst
;
1239 fl4
.flowi4_tos
= tun_key
->tos
;
1240 fl4
.daddr
= tun_key
->u
.ipv4
.dst
;
1241 fl4
.saddr
= tun_key
->u
.ipv4
.src
;
1243 err
= mlx5e_route_lookup_ipv4(priv
, mirred_dev
, out_dev
,
1248 if (!(n
->nud_state
& NUD_VALID
)) {
1249 pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__
, &fl4
.daddr
);
1255 e
->out_dev
= *out_dev
;
1257 neigh_ha_snapshot(e
->h_dest
, n
, *out_dev
);
1259 switch (e
->tunnel_type
) {
1260 case MLX5_HEADER_TYPE_VXLAN
:
1261 encap_size
= gen_vxlan_header_ipv4(*out_dev
, encap_header
,
1264 fl4
.saddr
, tun_key
->tp_dst
,
1265 tunnel_id_to_key32(tun_key
->tun_id
));
1272 err
= mlx5_encap_alloc(priv
->mdev
, e
->tunnel_type
,
1273 encap_size
, encap_header
, &e
->encap_id
);
1277 kfree(encap_header
);
1281 static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv
*priv
,
1282 struct net_device
*mirred_dev
,
1283 struct mlx5_encap_entry
*e
,
1284 struct net_device
**out_dev
)
1287 int max_encap_size
= MLX5_CAP_ESW(priv
->mdev
, max_encap_header_size
);
1288 struct ip_tunnel_key
*tun_key
= &e
->tun_info
.key
;
1289 int encap_size
, err
, ttl
= 0;
1290 struct neighbour
*n
= NULL
;
1291 struct flowi6 fl6
= {};
1294 encap_header
= kzalloc(max_encap_size
, GFP_KERNEL
);
1298 switch (e
->tunnel_type
) {
1299 case MLX5_HEADER_TYPE_VXLAN
:
1300 fl6
.flowi6_proto
= IPPROTO_UDP
;
1301 fl6
.fl6_dport
= tun_key
->tp_dst
;
1308 fl6
.flowlabel
= ip6_make_flowinfo(RT_TOS(tun_key
->tos
), tun_key
->label
);
1309 fl6
.daddr
= tun_key
->u
.ipv6
.dst
;
1310 fl6
.saddr
= tun_key
->u
.ipv6
.src
;
1312 err
= mlx5e_route_lookup_ipv6(priv
, mirred_dev
, out_dev
,
1317 if (!(n
->nud_state
& NUD_VALID
)) {
1318 pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__
, &fl6
.daddr
);
1324 e
->out_dev
= *out_dev
;
1326 neigh_ha_snapshot(e
->h_dest
, n
, *out_dev
);
1328 switch (e
->tunnel_type
) {
1329 case MLX5_HEADER_TYPE_VXLAN
:
1330 encap_size
= gen_vxlan_header_ipv6(*out_dev
, encap_header
,
1333 &fl6
.saddr
, tun_key
->tp_dst
,
1334 tunnel_id_to_key32(tun_key
->tun_id
));
1341 err
= mlx5_encap_alloc(priv
->mdev
, e
->tunnel_type
,
1342 encap_size
, encap_header
, &e
->encap_id
);
1346 kfree(encap_header
);
1350 static int mlx5e_attach_encap(struct mlx5e_priv
*priv
,
1351 struct ip_tunnel_info
*tun_info
,
1352 struct net_device
*mirred_dev
,
1353 struct mlx5_esw_flow_attr
*attr
)
1355 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1356 struct net_device
*up_dev
= mlx5_eswitch_get_uplink_netdev(esw
);
1357 struct mlx5e_priv
*up_priv
= netdev_priv(up_dev
);
1358 unsigned short family
= ip_tunnel_info_af(tun_info
);
1359 struct ip_tunnel_key
*key
= &tun_info
->key
;
1360 struct mlx5_encap_entry
*e
;
1361 struct net_device
*out_dev
;
1362 int tunnel_type
, err
= -EOPNOTSUPP
;
1366 /* udp dst port must be set */
1367 if (!memchr_inv(&key
->tp_dst
, 0, sizeof(key
->tp_dst
)))
1368 goto vxlan_encap_offload_err
;
1370 /* setting udp src port isn't supported */
1371 if (memchr_inv(&key
->tp_src
, 0, sizeof(key
->tp_src
))) {
1372 vxlan_encap_offload_err
:
1373 netdev_warn(priv
->netdev
,
1374 "must set udp dst port and not set udp src port\n");
1378 if (mlx5e_vxlan_lookup_port(up_priv
, be16_to_cpu(key
->tp_dst
)) &&
1379 MLX5_CAP_ESW(priv
->mdev
, vxlan_encap_decap
)) {
1380 tunnel_type
= MLX5_HEADER_TYPE_VXLAN
;
1382 netdev_warn(priv
->netdev
,
1383 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key
->tp_dst
));
1387 hash_key
= hash_encap_info(key
);
1389 hash_for_each_possible_rcu(esw
->offloads
.encap_tbl
, e
,
1390 encap_hlist
, hash_key
) {
1391 if (!cmp_encap_info(&e
->tun_info
.key
, key
)) {
1402 e
= kzalloc(sizeof(*e
), GFP_KERNEL
);
1406 e
->tun_info
= *tun_info
;
1407 e
->tunnel_type
= tunnel_type
;
1408 INIT_LIST_HEAD(&e
->flows
);
1410 if (family
== AF_INET
)
1411 err
= mlx5e_create_encap_header_ipv4(priv
, mirred_dev
, e
, &out_dev
);
1412 else if (family
== AF_INET6
)
1413 err
= mlx5e_create_encap_header_ipv6(priv
, mirred_dev
, e
, &out_dev
);
1419 hash_add_rcu(esw
->offloads
.encap_tbl
, &e
->encap_hlist
, hash_key
);
1428 static int parse_tc_fdb_actions(struct mlx5e_priv
*priv
, struct tcf_exts
*exts
,
1429 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
1430 struct mlx5e_tc_flow
*flow
)
1432 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
1433 struct ip_tunnel_info
*info
= NULL
;
1434 const struct tc_action
*a
;
1439 if (tc_no_actions(exts
))
1442 memset(attr
, 0, sizeof(*attr
));
1443 attr
->in_rep
= priv
->ppriv
;
1445 tcf_exts_to_list(exts
, &actions
);
1446 list_for_each_entry(a
, &actions
, list
) {
1447 if (is_tcf_gact_shot(a
)) {
1448 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
|
1449 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1453 if (is_tcf_pedit(a
)) {
1454 err
= parse_tc_pedit_action(priv
, a
, MLX5_FLOW_NAMESPACE_FDB
,
1459 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
1463 if (is_tcf_mirred_egress_redirect(a
)) {
1464 int ifindex
= tcf_mirred_ifindex(a
);
1465 struct net_device
*out_dev
;
1466 struct mlx5e_priv
*out_priv
;
1468 out_dev
= __dev_get_by_index(dev_net(priv
->netdev
), ifindex
);
1470 if (switchdev_port_same_parent_id(priv
->netdev
,
1472 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
1473 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1474 out_priv
= netdev_priv(out_dev
);
1475 attr
->out_rep
= out_priv
->ppriv
;
1477 err
= mlx5e_attach_encap(priv
, info
,
1481 list_add(&flow
->encap
, &attr
->encap
->flows
);
1482 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_ENCAP
|
1483 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
1484 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1485 out_priv
= netdev_priv(attr
->encap
->out_dev
);
1486 attr
->out_rep
= out_priv
->ppriv
;
1488 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
1489 priv
->netdev
->name
, out_dev
->name
);
1495 if (is_tcf_tunnel_set(a
)) {
1496 info
= tcf_tunnel_info(a
);
1504 if (is_tcf_vlan(a
)) {
1505 if (tcf_vlan_action(a
) == TCA_VLAN_ACT_POP
) {
1506 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
1507 } else if (tcf_vlan_action(a
) == TCA_VLAN_ACT_PUSH
) {
1508 if (tcf_vlan_push_proto(a
) != htons(ETH_P_8021Q
))
1511 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
;
1512 attr
->vlan
= tcf_vlan_push_vid(a
);
1513 } else { /* action is TCA_VLAN_ACT_MODIFY */
1519 if (is_tcf_tunnel_release(a
)) {
1520 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DECAP
;
1529 int mlx5e_configure_flower(struct mlx5e_priv
*priv
, __be16 protocol
,
1530 struct tc_cls_flower_offload
*f
)
1532 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1533 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
1534 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1535 struct mlx5e_tc_flow
*flow
;
1536 int attr_size
, err
= 0;
1539 if (esw
&& esw
->mode
== SRIOV_OFFLOADS
) {
1540 flow_flags
= MLX5E_TC_FLOW_ESWITCH
;
1541 attr_size
= sizeof(struct mlx5_esw_flow_attr
);
1543 flow_flags
= MLX5E_TC_FLOW_NIC
;
1544 attr_size
= sizeof(struct mlx5_nic_flow_attr
);
1547 flow
= kzalloc(sizeof(*flow
) + attr_size
, GFP_KERNEL
);
1548 parse_attr
= mlx5_vzalloc(sizeof(*parse_attr
));
1549 if (!parse_attr
|| !flow
) {
1554 flow
->cookie
= f
->cookie
;
1555 flow
->flags
= flow_flags
;
1557 err
= parse_cls_flower(priv
, flow
, &parse_attr
->spec
, f
);
1561 if (flow
->flags
& MLX5E_TC_FLOW_ESWITCH
) {
1562 err
= parse_tc_fdb_actions(priv
, f
->exts
, parse_attr
, flow
);
1565 flow
->rule
= mlx5e_tc_add_fdb_flow(priv
, parse_attr
, flow
);
1567 err
= parse_tc_nic_actions(priv
, f
->exts
, parse_attr
, flow
);
1570 flow
->rule
= mlx5e_tc_add_nic_flow(priv
, parse_attr
, flow
);
1573 if (IS_ERR(flow
->rule
)) {
1574 err
= PTR_ERR(flow
->rule
);
1578 err
= rhashtable_insert_fast(&tc
->ht
, &flow
->node
,
1586 mlx5e_tc_del_flow(priv
, flow
);
1595 int mlx5e_delete_flower(struct mlx5e_priv
*priv
,
1596 struct tc_cls_flower_offload
*f
)
1598 struct mlx5e_tc_flow
*flow
;
1599 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1601 flow
= rhashtable_lookup_fast(&tc
->ht
, &f
->cookie
,
1606 rhashtable_remove_fast(&tc
->ht
, &flow
->node
, tc
->ht_params
);
1608 mlx5e_tc_del_flow(priv
, flow
);
1616 int mlx5e_stats_flower(struct mlx5e_priv
*priv
,
1617 struct tc_cls_flower_offload
*f
)
1619 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1620 struct mlx5e_tc_flow
*flow
;
1621 struct tc_action
*a
;
1622 struct mlx5_fc
*counter
;
1628 flow
= rhashtable_lookup_fast(&tc
->ht
, &f
->cookie
,
1633 counter
= mlx5_flow_rule_counter(flow
->rule
);
1637 mlx5_fc_query_cached(counter
, &bytes
, &packets
, &lastuse
);
1641 tcf_exts_to_list(f
->exts
, &actions
);
1642 list_for_each_entry(a
, &actions
, list
)
1643 tcf_action_stats_update(a
, bytes
, packets
, lastuse
);
1650 static const struct rhashtable_params mlx5e_tc_flow_ht_params
= {
1651 .head_offset
= offsetof(struct mlx5e_tc_flow
, node
),
1652 .key_offset
= offsetof(struct mlx5e_tc_flow
, cookie
),
1653 .key_len
= sizeof(((struct mlx5e_tc_flow
*)0)->cookie
),
1654 .automatic_shrinking
= true,
1657 int mlx5e_tc_init(struct mlx5e_priv
*priv
)
1659 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1661 tc
->ht_params
= mlx5e_tc_flow_ht_params
;
1662 return rhashtable_init(&tc
->ht
, &tc
->ht_params
);
1665 static void _mlx5e_tc_del_flow(void *ptr
, void *arg
)
1667 struct mlx5e_tc_flow
*flow
= ptr
;
1668 struct mlx5e_priv
*priv
= arg
;
1670 mlx5e_tc_del_flow(priv
, flow
);
1674 void mlx5e_tc_cleanup(struct mlx5e_priv
*priv
)
1676 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1678 rhashtable_free_and_destroy(&tc
->ht
, _mlx5e_tc_del_flow
, priv
);
1680 if (!IS_ERR_OR_NULL(tc
->t
)) {
1681 mlx5_destroy_flow_table(tc
->t
);