2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <net/switchdev.h>
42 #include <net/tc_act/tc_mirred.h>
43 #include <net/tc_act/tc_vlan.h>
44 #include <net/tc_act/tc_tunnel_key.h>
45 #include <net/vxlan.h>
52 MLX5E_TC_FLOW_ESWITCH
= BIT(0),
55 struct mlx5e_tc_flow
{
56 struct rhash_head node
;
59 struct mlx5_flow_handle
*rule
;
60 struct list_head encap
; /* flows sharing the same encap */
61 struct mlx5_esw_flow_attr
*attr
;
65 MLX5_HEADER_TYPE_VXLAN
= 0x0,
66 MLX5_HEADER_TYPE_NVGRE
= 0x1,
69 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
70 #define MLX5E_TC_TABLE_NUM_GROUPS 4
72 static struct mlx5_flow_handle
*
73 mlx5e_tc_add_nic_flow(struct mlx5e_priv
*priv
,
74 struct mlx5_flow_spec
*spec
,
75 u32 action
, u32 flow_tag
)
77 struct mlx5_core_dev
*dev
= priv
->mdev
;
78 struct mlx5_flow_destination dest
= { 0 };
79 struct mlx5_flow_act flow_act
= {
84 struct mlx5_fc
*counter
= NULL
;
85 struct mlx5_flow_handle
*rule
;
86 bool table_created
= false;
88 if (action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
89 dest
.type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
90 dest
.ft
= priv
->fs
.vlan
.ft
.t
;
91 } else if (action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
92 counter
= mlx5_fc_create(dev
, true);
94 return ERR_CAST(counter
);
96 dest
.type
= MLX5_FLOW_DESTINATION_TYPE_COUNTER
;
97 dest
.counter
= counter
;
100 if (IS_ERR_OR_NULL(priv
->fs
.tc
.t
)) {
102 mlx5_create_auto_grouped_flow_table(priv
->fs
.ns
,
104 MLX5E_TC_TABLE_NUM_ENTRIES
,
105 MLX5E_TC_TABLE_NUM_GROUPS
,
107 if (IS_ERR(priv
->fs
.tc
.t
)) {
108 netdev_err(priv
->netdev
,
109 "Failed to create tc offload table\n");
110 rule
= ERR_CAST(priv
->fs
.tc
.t
);
114 table_created
= true;
117 spec
->match_criteria_enable
= MLX5_MATCH_OUTER_HEADERS
;
118 rule
= mlx5_add_flow_rules(priv
->fs
.tc
.t
, spec
, &flow_act
, &dest
, 1);
127 mlx5_destroy_flow_table(priv
->fs
.tc
.t
);
128 priv
->fs
.tc
.t
= NULL
;
131 mlx5_fc_destroy(dev
, counter
);
136 static struct mlx5_flow_handle
*
137 mlx5e_tc_add_fdb_flow(struct mlx5e_priv
*priv
,
138 struct mlx5_flow_spec
*spec
,
139 struct mlx5_esw_flow_attr
*attr
)
141 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
144 err
= mlx5_eswitch_add_vlan_action(esw
, attr
);
148 return mlx5_eswitch_add_offloaded_rule(esw
, spec
, attr
);
151 static void mlx5e_detach_encap(struct mlx5e_priv
*priv
,
152 struct mlx5e_tc_flow
*flow
) {
153 struct list_head
*next
= flow
->encap
.next
;
155 list_del(&flow
->encap
);
156 if (list_empty(next
)) {
157 struct mlx5_encap_entry
*e
;
159 e
= list_entry(next
, struct mlx5_encap_entry
, flows
);
161 mlx5_encap_dealloc(priv
->mdev
, e
->encap_id
);
164 hlist_del_rcu(&e
->encap_hlist
);
169 /* we get here also when setting rule to the FW failed, etc. It means that the
170 * flow rule itself might not exist, but some offloading related to the actions
173 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
174 struct mlx5e_tc_flow
*flow
)
176 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
177 struct mlx5_fc
*counter
= NULL
;
179 if (!IS_ERR(flow
->rule
)) {
180 counter
= mlx5_flow_rule_counter(flow
->rule
);
181 mlx5_del_flow_rules(flow
->rule
);
182 mlx5_fc_destroy(priv
->mdev
, counter
);
185 if (flow
->flags
& MLX5E_TC_FLOW_ESWITCH
) {
186 mlx5_eswitch_del_vlan_action(esw
, flow
->attr
);
187 if (flow
->attr
->action
& MLX5_FLOW_CONTEXT_ACTION_ENCAP
)
188 mlx5e_detach_encap(priv
, flow
);
191 if (!mlx5e_tc_num_filters(priv
) && (priv
->fs
.tc
.t
)) {
192 mlx5_destroy_flow_table(priv
->fs
.tc
.t
);
193 priv
->fs
.tc
.t
= NULL
;
197 static void parse_vxlan_attr(struct mlx5_flow_spec
*spec
,
198 struct tc_cls_flower_offload
*f
)
200 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
202 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
204 void *misc_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
206 void *misc_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
209 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
);
210 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
, IPPROTO_UDP
);
212 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_KEYID
)) {
213 struct flow_dissector_key_keyid
*key
=
214 skb_flow_dissector_target(f
->dissector
,
215 FLOW_DISSECTOR_KEY_ENC_KEYID
,
217 struct flow_dissector_key_keyid
*mask
=
218 skb_flow_dissector_target(f
->dissector
,
219 FLOW_DISSECTOR_KEY_ENC_KEYID
,
221 MLX5_SET(fte_match_set_misc
, misc_c
, vxlan_vni
,
222 be32_to_cpu(mask
->keyid
));
223 MLX5_SET(fte_match_set_misc
, misc_v
, vxlan_vni
,
224 be32_to_cpu(key
->keyid
));
228 static int parse_tunnel_attr(struct mlx5e_priv
*priv
,
229 struct mlx5_flow_spec
*spec
,
230 struct tc_cls_flower_offload
*f
)
232 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
234 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
237 struct flow_dissector_key_control
*enc_control
=
238 skb_flow_dissector_target(f
->dissector
,
239 FLOW_DISSECTOR_KEY_ENC_CONTROL
,
242 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_PORTS
)) {
243 struct flow_dissector_key_ports
*key
=
244 skb_flow_dissector_target(f
->dissector
,
245 FLOW_DISSECTOR_KEY_ENC_PORTS
,
247 struct flow_dissector_key_ports
*mask
=
248 skb_flow_dissector_target(f
->dissector
,
249 FLOW_DISSECTOR_KEY_ENC_PORTS
,
252 /* Full udp dst port must be given */
253 if (memchr_inv(&mask
->dst
, 0xff, sizeof(mask
->dst
)))
254 goto vxlan_match_offload_err
;
256 if (mlx5e_vxlan_lookup_port(priv
, be16_to_cpu(key
->dst
)) &&
257 MLX5_CAP_ESW(priv
->mdev
, vxlan_encap_decap
))
258 parse_vxlan_attr(spec
, f
);
260 netdev_warn(priv
->netdev
,
261 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key
->dst
));
265 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
266 udp_dport
, ntohs(mask
->dst
));
267 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
268 udp_dport
, ntohs(key
->dst
));
270 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
271 udp_sport
, ntohs(mask
->src
));
272 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
273 udp_sport
, ntohs(key
->src
));
274 } else { /* udp dst port must be given */
275 vxlan_match_offload_err
:
276 netdev_warn(priv
->netdev
,
277 "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
281 if (enc_control
->addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
282 struct flow_dissector_key_ipv4_addrs
*key
=
283 skb_flow_dissector_target(f
->dissector
,
284 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
,
286 struct flow_dissector_key_ipv4_addrs
*mask
=
287 skb_flow_dissector_target(f
->dissector
,
288 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
,
290 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
291 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
293 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
294 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
297 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
298 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
300 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
301 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
304 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ethertype
);
305 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
, ETH_P_IP
);
306 } else if (enc_control
->addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
307 struct flow_dissector_key_ipv6_addrs
*key
=
308 skb_flow_dissector_target(f
->dissector
,
309 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
,
311 struct flow_dissector_key_ipv6_addrs
*mask
=
312 skb_flow_dissector_target(f
->dissector
,
313 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
,
316 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
317 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
318 &mask
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
319 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
320 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
321 &key
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
323 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
324 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
325 &mask
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
326 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
327 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
328 &key
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
330 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ethertype
);
331 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
, ETH_P_IPV6
);
334 /* Enforce DMAC when offloading incoming tunneled flows.
335 * Flow counters require a match on the DMAC.
337 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_47_16
);
338 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_15_0
);
339 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
340 dmac_47_16
), priv
->netdev
->dev_addr
);
342 /* let software handle IP fragments */
343 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
344 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
, 0);
349 static int __parse_cls_flower(struct mlx5e_priv
*priv
,
350 struct mlx5_flow_spec
*spec
,
351 struct tc_cls_flower_offload
*f
,
354 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
356 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
361 *min_inline
= MLX5_INLINE_MODE_L2
;
363 if (f
->dissector
->used_keys
&
364 ~(BIT(FLOW_DISSECTOR_KEY_CONTROL
) |
365 BIT(FLOW_DISSECTOR_KEY_BASIC
) |
366 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS
) |
367 BIT(FLOW_DISSECTOR_KEY_VLAN
) |
368 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS
) |
369 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS
) |
370 BIT(FLOW_DISSECTOR_KEY_PORTS
) |
371 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID
) |
372 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) |
373 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
) |
374 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS
) |
375 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL
))) {
376 netdev_warn(priv
->netdev
, "Unsupported key used: 0x%x\n",
377 f
->dissector
->used_keys
);
381 if ((dissector_uses_key(f
->dissector
,
382 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) ||
383 dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_KEYID
) ||
384 dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_PORTS
)) &&
385 dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_CONTROL
)) {
386 struct flow_dissector_key_control
*key
=
387 skb_flow_dissector_target(f
->dissector
,
388 FLOW_DISSECTOR_KEY_ENC_CONTROL
,
390 switch (key
->addr_type
) {
391 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
392 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
393 if (parse_tunnel_attr(priv
, spec
, f
))
400 /* In decap flow, header pointers should point to the inner
401 * headers, outer header were already set by parse_tunnel_attr
403 headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
405 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
409 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_CONTROL
)) {
410 struct flow_dissector_key_control
*key
=
411 skb_flow_dissector_target(f
->dissector
,
412 FLOW_DISSECTOR_KEY_CONTROL
,
415 struct flow_dissector_key_control
*mask
=
416 skb_flow_dissector_target(f
->dissector
,
417 FLOW_DISSECTOR_KEY_CONTROL
,
419 addr_type
= key
->addr_type
;
421 if (mask
->flags
& FLOW_DIS_IS_FRAGMENT
) {
422 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
423 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
,
424 key
->flags
& FLOW_DIS_IS_FRAGMENT
);
426 /* the HW doesn't need L3 inline to match on frag=no */
427 if (key
->flags
& FLOW_DIS_IS_FRAGMENT
)
428 *min_inline
= MLX5_INLINE_MODE_IP
;
432 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_BASIC
)) {
433 struct flow_dissector_key_basic
*key
=
434 skb_flow_dissector_target(f
->dissector
,
435 FLOW_DISSECTOR_KEY_BASIC
,
437 struct flow_dissector_key_basic
*mask
=
438 skb_flow_dissector_target(f
->dissector
,
439 FLOW_DISSECTOR_KEY_BASIC
,
441 ip_proto
= key
->ip_proto
;
443 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ethertype
,
444 ntohs(mask
->n_proto
));
445 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
446 ntohs(key
->n_proto
));
448 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
,
450 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
,
454 *min_inline
= MLX5_INLINE_MODE_IP
;
457 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ETH_ADDRS
)) {
458 struct flow_dissector_key_eth_addrs
*key
=
459 skb_flow_dissector_target(f
->dissector
,
460 FLOW_DISSECTOR_KEY_ETH_ADDRS
,
462 struct flow_dissector_key_eth_addrs
*mask
=
463 skb_flow_dissector_target(f
->dissector
,
464 FLOW_DISSECTOR_KEY_ETH_ADDRS
,
467 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
470 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
474 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
477 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
482 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_VLAN
)) {
483 struct flow_dissector_key_vlan
*key
=
484 skb_flow_dissector_target(f
->dissector
,
485 FLOW_DISSECTOR_KEY_VLAN
,
487 struct flow_dissector_key_vlan
*mask
=
488 skb_flow_dissector_target(f
->dissector
,
489 FLOW_DISSECTOR_KEY_VLAN
,
491 if (mask
->vlan_id
|| mask
->vlan_priority
) {
492 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
, 1);
493 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
, 1);
495 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_vid
, mask
->vlan_id
);
496 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_vid
, key
->vlan_id
);
498 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_prio
, mask
->vlan_priority
);
499 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_prio
, key
->vlan_priority
);
503 if (addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
504 struct flow_dissector_key_ipv4_addrs
*key
=
505 skb_flow_dissector_target(f
->dissector
,
506 FLOW_DISSECTOR_KEY_IPV4_ADDRS
,
508 struct flow_dissector_key_ipv4_addrs
*mask
=
509 skb_flow_dissector_target(f
->dissector
,
510 FLOW_DISSECTOR_KEY_IPV4_ADDRS
,
513 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
514 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
515 &mask
->src
, sizeof(mask
->src
));
516 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
517 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
518 &key
->src
, sizeof(key
->src
));
519 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
520 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
521 &mask
->dst
, sizeof(mask
->dst
));
522 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
523 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
524 &key
->dst
, sizeof(key
->dst
));
526 if (mask
->src
|| mask
->dst
)
527 *min_inline
= MLX5_INLINE_MODE_IP
;
530 if (addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
531 struct flow_dissector_key_ipv6_addrs
*key
=
532 skb_flow_dissector_target(f
->dissector
,
533 FLOW_DISSECTOR_KEY_IPV6_ADDRS
,
535 struct flow_dissector_key_ipv6_addrs
*mask
=
536 skb_flow_dissector_target(f
->dissector
,
537 FLOW_DISSECTOR_KEY_IPV6_ADDRS
,
540 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
541 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
542 &mask
->src
, sizeof(mask
->src
));
543 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
544 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
545 &key
->src
, sizeof(key
->src
));
547 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
548 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
549 &mask
->dst
, sizeof(mask
->dst
));
550 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
551 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
552 &key
->dst
, sizeof(key
->dst
));
554 if (ipv6_addr_type(&mask
->src
) != IPV6_ADDR_ANY
||
555 ipv6_addr_type(&mask
->dst
) != IPV6_ADDR_ANY
)
556 *min_inline
= MLX5_INLINE_MODE_IP
;
559 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_PORTS
)) {
560 struct flow_dissector_key_ports
*key
=
561 skb_flow_dissector_target(f
->dissector
,
562 FLOW_DISSECTOR_KEY_PORTS
,
564 struct flow_dissector_key_ports
*mask
=
565 skb_flow_dissector_target(f
->dissector
,
566 FLOW_DISSECTOR_KEY_PORTS
,
570 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
571 tcp_sport
, ntohs(mask
->src
));
572 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
573 tcp_sport
, ntohs(key
->src
));
575 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
576 tcp_dport
, ntohs(mask
->dst
));
577 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
578 tcp_dport
, ntohs(key
->dst
));
582 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
583 udp_sport
, ntohs(mask
->src
));
584 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
585 udp_sport
, ntohs(key
->src
));
587 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
588 udp_dport
, ntohs(mask
->dst
));
589 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
590 udp_dport
, ntohs(key
->dst
));
593 netdev_err(priv
->netdev
,
594 "Only UDP and TCP transport are supported\n");
598 if (mask
->src
|| mask
->dst
)
599 *min_inline
= MLX5_INLINE_MODE_TCP_UDP
;
605 static int parse_cls_flower(struct mlx5e_priv
*priv
,
606 struct mlx5e_tc_flow
*flow
,
607 struct mlx5_flow_spec
*spec
,
608 struct tc_cls_flower_offload
*f
)
610 struct mlx5_core_dev
*dev
= priv
->mdev
;
611 struct mlx5_eswitch
*esw
= dev
->priv
.eswitch
;
612 struct mlx5_eswitch_rep
*rep
= priv
->ppriv
;
616 err
= __parse_cls_flower(priv
, spec
, f
, &min_inline
);
618 if (!err
&& (flow
->flags
& MLX5E_TC_FLOW_ESWITCH
) &&
619 rep
->vport
!= FDB_UPLINK_VPORT
) {
620 if (min_inline
> esw
->offloads
.inline_mode
) {
621 netdev_warn(priv
->netdev
,
622 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
623 min_inline
, esw
->offloads
.inline_mode
);
631 static int parse_tc_nic_actions(struct mlx5e_priv
*priv
, struct tcf_exts
*exts
,
632 u32
*action
, u32
*flow_tag
)
634 const struct tc_action
*a
;
637 if (tc_no_actions(exts
))
640 *flow_tag
= MLX5_FS_DEFAULT_FLOW_TAG
;
643 tcf_exts_to_list(exts
, &actions
);
644 list_for_each_entry(a
, &actions
, list
) {
645 /* Only support a single action per rule */
649 if (is_tcf_gact_shot(a
)) {
650 *action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
;
651 if (MLX5_CAP_FLOWTABLE(priv
->mdev
,
652 flow_table_properties_nic_receive
.flow_counter
))
653 *action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
657 if (is_tcf_skbedit_mark(a
)) {
658 u32 mark
= tcf_skbedit_mark(a
);
660 if (mark
& ~MLX5E_TC_FLOW_ID_MASK
) {
661 netdev_warn(priv
->netdev
, "Bad flow mark - only 16 bit is supported: 0x%x\n",
667 *action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
677 static inline int cmp_encap_info(struct ip_tunnel_key
*a
,
678 struct ip_tunnel_key
*b
)
680 return memcmp(a
, b
, sizeof(*a
));
683 static inline int hash_encap_info(struct ip_tunnel_key
*key
)
685 return jhash(key
, sizeof(*key
), 0);
688 static int mlx5e_route_lookup_ipv4(struct mlx5e_priv
*priv
,
689 struct net_device
*mirred_dev
,
690 struct net_device
**out_dev
,
692 struct neighbour
**out_n
,
695 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
697 struct neighbour
*n
= NULL
;
699 #if IS_ENABLED(CONFIG_INET)
702 rt
= ip_route_output_key(dev_net(mirred_dev
), fl4
);
703 ret
= PTR_ERR_OR_ZERO(rt
);
709 /* if the egress device isn't on the same HW e-switch, we use the uplink */
710 if (!switchdev_port_same_parent_id(priv
->netdev
, rt
->dst
.dev
))
711 *out_dev
= mlx5_eswitch_get_uplink_netdev(esw
);
713 *out_dev
= rt
->dst
.dev
;
715 *out_ttl
= ip4_dst_hoplimit(&rt
->dst
);
716 n
= dst_neigh_lookup(&rt
->dst
, &fl4
->daddr
);
725 static int mlx5e_route_lookup_ipv6(struct mlx5e_priv
*priv
,
726 struct net_device
*mirred_dev
,
727 struct net_device
**out_dev
,
729 struct neighbour
**out_n
,
732 struct neighbour
*n
= NULL
;
733 struct dst_entry
*dst
;
735 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
736 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
739 dst
= ip6_route_output(dev_net(mirred_dev
), NULL
, fl6
);
746 *out_ttl
= ip6_dst_hoplimit(dst
);
748 /* if the egress device isn't on the same HW e-switch, we use the uplink */
749 if (!switchdev_port_same_parent_id(priv
->netdev
, dst
->dev
))
750 *out_dev
= mlx5_eswitch_get_uplink_netdev(esw
);
757 n
= dst_neigh_lookup(dst
, &fl6
->daddr
);
766 static int gen_vxlan_header_ipv4(struct net_device
*out_dev
,
768 unsigned char h_dest
[ETH_ALEN
],
775 int encap_size
= VXLAN_HLEN
+ sizeof(struct iphdr
) + ETH_HLEN
;
776 struct ethhdr
*eth
= (struct ethhdr
*)buf
;
777 struct iphdr
*ip
= (struct iphdr
*)((char *)eth
+ sizeof(struct ethhdr
));
778 struct udphdr
*udp
= (struct udphdr
*)((char *)ip
+ sizeof(struct iphdr
));
779 struct vxlanhdr
*vxh
= (struct vxlanhdr
*)((char *)udp
+ sizeof(struct udphdr
));
781 memset(buf
, 0, encap_size
);
783 ether_addr_copy(eth
->h_dest
, h_dest
);
784 ether_addr_copy(eth
->h_source
, out_dev
->dev_addr
);
785 eth
->h_proto
= htons(ETH_P_IP
);
791 ip
->protocol
= IPPROTO_UDP
;
795 udp
->dest
= udp_dst_port
;
796 vxh
->vx_flags
= VXLAN_HF_VNI
;
797 vxh
->vx_vni
= vxlan_vni_field(vx_vni
);
802 static int gen_vxlan_header_ipv6(struct net_device
*out_dev
,
804 unsigned char h_dest
[ETH_ALEN
],
806 struct in6_addr
*daddr
,
807 struct in6_addr
*saddr
,
811 int encap_size
= VXLAN_HLEN
+ sizeof(struct ipv6hdr
) + ETH_HLEN
;
812 struct ethhdr
*eth
= (struct ethhdr
*)buf
;
813 struct ipv6hdr
*ip6h
= (struct ipv6hdr
*)((char *)eth
+ sizeof(struct ethhdr
));
814 struct udphdr
*udp
= (struct udphdr
*)((char *)ip6h
+ sizeof(struct ipv6hdr
));
815 struct vxlanhdr
*vxh
= (struct vxlanhdr
*)((char *)udp
+ sizeof(struct udphdr
));
817 memset(buf
, 0, encap_size
);
819 ether_addr_copy(eth
->h_dest
, h_dest
);
820 ether_addr_copy(eth
->h_source
, out_dev
->dev_addr
);
821 eth
->h_proto
= htons(ETH_P_IPV6
);
823 ip6_flow_hdr(ip6h
, 0, 0);
824 /* the HW fills up ipv6 payload len */
825 ip6h
->nexthdr
= IPPROTO_UDP
;
826 ip6h
->hop_limit
= ttl
;
827 ip6h
->daddr
= *daddr
;
828 ip6h
->saddr
= *saddr
;
830 udp
->dest
= udp_dst_port
;
831 vxh
->vx_flags
= VXLAN_HF_VNI
;
832 vxh
->vx_vni
= vxlan_vni_field(vx_vni
);
837 static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv
*priv
,
838 struct net_device
*mirred_dev
,
839 struct mlx5_encap_entry
*e
,
840 struct net_device
**out_dev
)
842 int max_encap_size
= MLX5_CAP_ESW(priv
->mdev
, max_encap_header_size
);
843 struct ip_tunnel_key
*tun_key
= &e
->tun_info
.key
;
844 int encap_size
, ttl
, err
;
845 struct neighbour
*n
= NULL
;
846 struct flowi4 fl4
= {};
849 encap_header
= kzalloc(max_encap_size
, GFP_KERNEL
);
853 switch (e
->tunnel_type
) {
854 case MLX5_HEADER_TYPE_VXLAN
:
855 fl4
.flowi4_proto
= IPPROTO_UDP
;
856 fl4
.fl4_dport
= tun_key
->tp_dst
;
862 fl4
.flowi4_tos
= tun_key
->tos
;
863 fl4
.daddr
= tun_key
->u
.ipv4
.dst
;
864 fl4
.saddr
= tun_key
->u
.ipv4
.src
;
866 err
= mlx5e_route_lookup_ipv4(priv
, mirred_dev
, out_dev
,
871 if (!(n
->nud_state
& NUD_VALID
)) {
872 pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__
, &fl4
.daddr
);
878 e
->out_dev
= *out_dev
;
880 neigh_ha_snapshot(e
->h_dest
, n
, *out_dev
);
882 switch (e
->tunnel_type
) {
883 case MLX5_HEADER_TYPE_VXLAN
:
884 encap_size
= gen_vxlan_header_ipv4(*out_dev
, encap_header
,
887 fl4
.saddr
, tun_key
->tp_dst
,
888 tunnel_id_to_key32(tun_key
->tun_id
));
895 err
= mlx5_encap_alloc(priv
->mdev
, e
->tunnel_type
,
896 encap_size
, encap_header
, &e
->encap_id
);
904 static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv
*priv
,
905 struct net_device
*mirred_dev
,
906 struct mlx5_encap_entry
*e
,
907 struct net_device
**out_dev
)
910 int max_encap_size
= MLX5_CAP_ESW(priv
->mdev
, max_encap_header_size
);
911 struct ip_tunnel_key
*tun_key
= &e
->tun_info
.key
;
912 int encap_size
, err
, ttl
= 0;
913 struct neighbour
*n
= NULL
;
914 struct flowi6 fl6
= {};
917 encap_header
= kzalloc(max_encap_size
, GFP_KERNEL
);
921 switch (e
->tunnel_type
) {
922 case MLX5_HEADER_TYPE_VXLAN
:
923 fl6
.flowi6_proto
= IPPROTO_UDP
;
924 fl6
.fl6_dport
= tun_key
->tp_dst
;
931 fl6
.flowlabel
= ip6_make_flowinfo(RT_TOS(tun_key
->tos
), tun_key
->label
);
932 fl6
.daddr
= tun_key
->u
.ipv6
.dst
;
933 fl6
.saddr
= tun_key
->u
.ipv6
.src
;
935 err
= mlx5e_route_lookup_ipv6(priv
, mirred_dev
, out_dev
,
940 if (!(n
->nud_state
& NUD_VALID
)) {
941 pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__
, &fl6
.daddr
);
947 e
->out_dev
= *out_dev
;
949 neigh_ha_snapshot(e
->h_dest
, n
, *out_dev
);
951 switch (e
->tunnel_type
) {
952 case MLX5_HEADER_TYPE_VXLAN
:
953 encap_size
= gen_vxlan_header_ipv6(*out_dev
, encap_header
,
956 &fl6
.saddr
, tun_key
->tp_dst
,
957 tunnel_id_to_key32(tun_key
->tun_id
));
964 err
= mlx5_encap_alloc(priv
->mdev
, e
->tunnel_type
,
965 encap_size
, encap_header
, &e
->encap_id
);
973 static int mlx5e_attach_encap(struct mlx5e_priv
*priv
,
974 struct ip_tunnel_info
*tun_info
,
975 struct net_device
*mirred_dev
,
976 struct mlx5_esw_flow_attr
*attr
)
978 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
979 unsigned short family
= ip_tunnel_info_af(tun_info
);
980 struct ip_tunnel_key
*key
= &tun_info
->key
;
981 struct mlx5_encap_entry
*e
;
982 struct net_device
*out_dev
;
983 int tunnel_type
, err
= -EOPNOTSUPP
;
987 /* udp dst port must be set */
988 if (!memchr_inv(&key
->tp_dst
, 0, sizeof(key
->tp_dst
)))
989 goto vxlan_encap_offload_err
;
991 /* setting udp src port isn't supported */
992 if (memchr_inv(&key
->tp_src
, 0, sizeof(key
->tp_src
))) {
993 vxlan_encap_offload_err
:
994 netdev_warn(priv
->netdev
,
995 "must set udp dst port and not set udp src port\n");
999 if (mlx5e_vxlan_lookup_port(priv
, be16_to_cpu(key
->tp_dst
)) &&
1000 MLX5_CAP_ESW(priv
->mdev
, vxlan_encap_decap
)) {
1001 tunnel_type
= MLX5_HEADER_TYPE_VXLAN
;
1003 netdev_warn(priv
->netdev
,
1004 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key
->tp_dst
));
1008 hash_key
= hash_encap_info(key
);
1010 hash_for_each_possible_rcu(esw
->offloads
.encap_tbl
, e
,
1011 encap_hlist
, hash_key
) {
1012 if (!cmp_encap_info(&e
->tun_info
.key
, key
)) {
1023 e
= kzalloc(sizeof(*e
), GFP_KERNEL
);
1027 e
->tun_info
= *tun_info
;
1028 e
->tunnel_type
= tunnel_type
;
1029 INIT_LIST_HEAD(&e
->flows
);
1031 if (family
== AF_INET
)
1032 err
= mlx5e_create_encap_header_ipv4(priv
, mirred_dev
, e
, &out_dev
);
1033 else if (family
== AF_INET6
)
1034 err
= mlx5e_create_encap_header_ipv6(priv
, mirred_dev
, e
, &out_dev
);
1040 hash_add_rcu(esw
->offloads
.encap_tbl
, &e
->encap_hlist
, hash_key
);
1049 static int parse_tc_fdb_actions(struct mlx5e_priv
*priv
, struct tcf_exts
*exts
,
1050 struct mlx5e_tc_flow
*flow
)
1052 struct mlx5_esw_flow_attr
*attr
= flow
->attr
;
1053 struct ip_tunnel_info
*info
= NULL
;
1054 const struct tc_action
*a
;
1059 if (tc_no_actions(exts
))
1062 memset(attr
, 0, sizeof(*attr
));
1063 attr
->in_rep
= priv
->ppriv
;
1065 tcf_exts_to_list(exts
, &actions
);
1066 list_for_each_entry(a
, &actions
, list
) {
1067 if (is_tcf_gact_shot(a
)) {
1068 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
|
1069 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1073 if (is_tcf_mirred_egress_redirect(a
)) {
1074 int ifindex
= tcf_mirred_ifindex(a
);
1075 struct net_device
*out_dev
;
1076 struct mlx5e_priv
*out_priv
;
1078 out_dev
= __dev_get_by_index(dev_net(priv
->netdev
), ifindex
);
1080 if (switchdev_port_same_parent_id(priv
->netdev
,
1082 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
1083 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1084 out_priv
= netdev_priv(out_dev
);
1085 attr
->out_rep
= out_priv
->ppriv
;
1087 err
= mlx5e_attach_encap(priv
, info
,
1091 list_add(&flow
->encap
, &attr
->encap
->flows
);
1092 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_ENCAP
|
1093 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
1094 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1095 out_priv
= netdev_priv(attr
->encap
->out_dev
);
1096 attr
->out_rep
= out_priv
->ppriv
;
1098 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
1099 priv
->netdev
->name
, out_dev
->name
);
1105 if (is_tcf_tunnel_set(a
)) {
1106 info
= tcf_tunnel_info(a
);
1114 if (is_tcf_vlan(a
)) {
1115 if (tcf_vlan_action(a
) == VLAN_F_POP
) {
1116 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
1117 } else if (tcf_vlan_action(a
) == VLAN_F_PUSH
) {
1118 if (tcf_vlan_push_proto(a
) != htons(ETH_P_8021Q
))
1121 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
;
1122 attr
->vlan
= tcf_vlan_push_vid(a
);
1127 if (is_tcf_tunnel_release(a
)) {
1128 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DECAP
;
1137 int mlx5e_configure_flower(struct mlx5e_priv
*priv
, __be16 protocol
,
1138 struct tc_cls_flower_offload
*f
)
1140 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1141 int err
, attr_size
= 0;
1142 u32 flow_tag
, action
;
1143 struct mlx5e_tc_flow
*flow
;
1144 struct mlx5_flow_spec
*spec
;
1145 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1148 if (esw
&& esw
->mode
== SRIOV_OFFLOADS
) {
1149 flow_flags
= MLX5E_TC_FLOW_ESWITCH
;
1150 attr_size
= sizeof(struct mlx5_esw_flow_attr
);
1153 flow
= kzalloc(sizeof(*flow
) + attr_size
, GFP_KERNEL
);
1154 spec
= mlx5_vzalloc(sizeof(*spec
));
1155 if (!spec
|| !flow
) {
1160 flow
->cookie
= f
->cookie
;
1161 flow
->flags
= flow_flags
;
1163 err
= parse_cls_flower(priv
, flow
, spec
, f
);
1167 if (flow
->flags
& MLX5E_TC_FLOW_ESWITCH
) {
1168 flow
->attr
= (struct mlx5_esw_flow_attr
*)(flow
+ 1);
1169 err
= parse_tc_fdb_actions(priv
, f
->exts
, flow
);
1172 flow
->rule
= mlx5e_tc_add_fdb_flow(priv
, spec
, flow
->attr
);
1174 err
= parse_tc_nic_actions(priv
, f
->exts
, &action
, &flow_tag
);
1177 flow
->rule
= mlx5e_tc_add_nic_flow(priv
, spec
, action
, flow_tag
);
1180 if (IS_ERR(flow
->rule
)) {
1181 err
= PTR_ERR(flow
->rule
);
1185 err
= rhashtable_insert_fast(&tc
->ht
, &flow
->node
,
1193 mlx5e_tc_del_flow(priv
, flow
);
1202 int mlx5e_delete_flower(struct mlx5e_priv
*priv
,
1203 struct tc_cls_flower_offload
*f
)
1205 struct mlx5e_tc_flow
*flow
;
1206 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1208 flow
= rhashtable_lookup_fast(&tc
->ht
, &f
->cookie
,
1213 rhashtable_remove_fast(&tc
->ht
, &flow
->node
, tc
->ht_params
);
1215 mlx5e_tc_del_flow(priv
, flow
);
1223 int mlx5e_stats_flower(struct mlx5e_priv
*priv
,
1224 struct tc_cls_flower_offload
*f
)
1226 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1227 struct mlx5e_tc_flow
*flow
;
1228 struct tc_action
*a
;
1229 struct mlx5_fc
*counter
;
1235 flow
= rhashtable_lookup_fast(&tc
->ht
, &f
->cookie
,
1240 counter
= mlx5_flow_rule_counter(flow
->rule
);
1244 mlx5_fc_query_cached(counter
, &bytes
, &packets
, &lastuse
);
1248 tcf_exts_to_list(f
->exts
, &actions
);
1249 list_for_each_entry(a
, &actions
, list
)
1250 tcf_action_stats_update(a
, bytes
, packets
, lastuse
);
1257 static const struct rhashtable_params mlx5e_tc_flow_ht_params
= {
1258 .head_offset
= offsetof(struct mlx5e_tc_flow
, node
),
1259 .key_offset
= offsetof(struct mlx5e_tc_flow
, cookie
),
1260 .key_len
= sizeof(((struct mlx5e_tc_flow
*)0)->cookie
),
1261 .automatic_shrinking
= true,
1264 int mlx5e_tc_init(struct mlx5e_priv
*priv
)
1266 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1268 tc
->ht_params
= mlx5e_tc_flow_ht_params
;
1269 return rhashtable_init(&tc
->ht
, &tc
->ht_params
);
1272 static void _mlx5e_tc_del_flow(void *ptr
, void *arg
)
1274 struct mlx5e_tc_flow
*flow
= ptr
;
1275 struct mlx5e_priv
*priv
= arg
;
1277 mlx5e_tc_del_flow(priv
, flow
);
1281 void mlx5e_tc_cleanup(struct mlx5e_priv
*priv
)
1283 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1285 rhashtable_free_and_destroy(&tc
->ht
, _mlx5e_tc_del_flow
, priv
);
1287 if (!IS_ERR_OR_NULL(tc
->t
)) {
1288 mlx5_destroy_flow_table(tc
->t
);