2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <net/switchdev.h>
42 #include <net/tc_act/tc_mirred.h>
43 #include <net/tc_act/tc_vlan.h>
44 #include <net/tc_act/tc_tunnel_key.h>
45 #include <net/vxlan.h>
51 struct mlx5e_tc_flow
{
52 struct rhash_head node
;
54 struct mlx5_flow_handle
*rule
;
55 struct list_head encap
; /* flows sharing the same encap */
56 struct mlx5_esw_flow_attr
*attr
;
60 MLX5_HEADER_TYPE_VXLAN
= 0x0,
61 MLX5_HEADER_TYPE_NVGRE
= 0x1,
64 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
65 #define MLX5E_TC_TABLE_NUM_GROUPS 4
67 static struct mlx5_flow_handle
*
68 mlx5e_tc_add_nic_flow(struct mlx5e_priv
*priv
,
69 struct mlx5_flow_spec
*spec
,
70 u32 action
, u32 flow_tag
)
72 struct mlx5_core_dev
*dev
= priv
->mdev
;
73 struct mlx5_flow_destination dest
= { 0 };
74 struct mlx5_flow_act flow_act
= {
79 struct mlx5_fc
*counter
= NULL
;
80 struct mlx5_flow_handle
*rule
;
81 bool table_created
= false;
83 if (action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
84 dest
.type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
85 dest
.ft
= priv
->fs
.vlan
.ft
.t
;
86 } else if (action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
87 counter
= mlx5_fc_create(dev
, true);
89 return ERR_CAST(counter
);
91 dest
.type
= MLX5_FLOW_DESTINATION_TYPE_COUNTER
;
92 dest
.counter
= counter
;
95 if (IS_ERR_OR_NULL(priv
->fs
.tc
.t
)) {
97 mlx5_create_auto_grouped_flow_table(priv
->fs
.ns
,
99 MLX5E_TC_TABLE_NUM_ENTRIES
,
100 MLX5E_TC_TABLE_NUM_GROUPS
,
102 if (IS_ERR(priv
->fs
.tc
.t
)) {
103 netdev_err(priv
->netdev
,
104 "Failed to create tc offload table\n");
105 rule
= ERR_CAST(priv
->fs
.tc
.t
);
109 table_created
= true;
112 spec
->match_criteria_enable
= MLX5_MATCH_OUTER_HEADERS
;
113 rule
= mlx5_add_flow_rules(priv
->fs
.tc
.t
, spec
, &flow_act
, &dest
, 1);
122 mlx5_destroy_flow_table(priv
->fs
.tc
.t
);
123 priv
->fs
.tc
.t
= NULL
;
126 mlx5_fc_destroy(dev
, counter
);
131 static struct mlx5_flow_handle
*
132 mlx5e_tc_add_fdb_flow(struct mlx5e_priv
*priv
,
133 struct mlx5_flow_spec
*spec
,
134 struct mlx5_esw_flow_attr
*attr
)
136 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
139 err
= mlx5_eswitch_add_vlan_action(esw
, attr
);
143 return mlx5_eswitch_add_offloaded_rule(esw
, spec
, attr
);
146 static void mlx5e_detach_encap(struct mlx5e_priv
*priv
,
147 struct mlx5e_tc_flow
*flow
) {
148 struct list_head
*next
= flow
->encap
.next
;
150 list_del(&flow
->encap
);
151 if (list_empty(next
)) {
152 struct mlx5_encap_entry
*e
;
154 e
= list_entry(next
, struct mlx5_encap_entry
, flows
);
156 mlx5_encap_dealloc(priv
->mdev
, e
->encap_id
);
159 hlist_del_rcu(&e
->encap_hlist
);
164 /* we get here also when setting rule to the FW failed, etc. It means that the
165 * flow rule itself might not exist, but some offloading related to the actions
168 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
169 struct mlx5e_tc_flow
*flow
)
171 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
172 struct mlx5_fc
*counter
= NULL
;
174 if (!IS_ERR(flow
->rule
)) {
175 counter
= mlx5_flow_rule_counter(flow
->rule
);
176 mlx5_del_flow_rules(flow
->rule
);
177 mlx5_fc_destroy(priv
->mdev
, counter
);
180 if (esw
&& esw
->mode
== SRIOV_OFFLOADS
) {
181 mlx5_eswitch_del_vlan_action(esw
, flow
->attr
);
182 if (flow
->attr
->action
& MLX5_FLOW_CONTEXT_ACTION_ENCAP
)
183 mlx5e_detach_encap(priv
, flow
);
186 if (!mlx5e_tc_num_filters(priv
) && (priv
->fs
.tc
.t
)) {
187 mlx5_destroy_flow_table(priv
->fs
.tc
.t
);
188 priv
->fs
.tc
.t
= NULL
;
192 static void parse_vxlan_attr(struct mlx5_flow_spec
*spec
,
193 struct tc_cls_flower_offload
*f
)
195 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
197 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
199 void *misc_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
201 void *misc_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
204 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
);
205 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
, IPPROTO_UDP
);
207 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_KEYID
)) {
208 struct flow_dissector_key_keyid
*key
=
209 skb_flow_dissector_target(f
->dissector
,
210 FLOW_DISSECTOR_KEY_ENC_KEYID
,
212 struct flow_dissector_key_keyid
*mask
=
213 skb_flow_dissector_target(f
->dissector
,
214 FLOW_DISSECTOR_KEY_ENC_KEYID
,
216 MLX5_SET(fte_match_set_misc
, misc_c
, vxlan_vni
,
217 be32_to_cpu(mask
->keyid
));
218 MLX5_SET(fte_match_set_misc
, misc_v
, vxlan_vni
,
219 be32_to_cpu(key
->keyid
));
223 static int parse_tunnel_attr(struct mlx5e_priv
*priv
,
224 struct mlx5_flow_spec
*spec
,
225 struct tc_cls_flower_offload
*f
)
227 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
229 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
232 struct flow_dissector_key_control
*enc_control
=
233 skb_flow_dissector_target(f
->dissector
,
234 FLOW_DISSECTOR_KEY_ENC_CONTROL
,
237 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_PORTS
)) {
238 struct flow_dissector_key_ports
*key
=
239 skb_flow_dissector_target(f
->dissector
,
240 FLOW_DISSECTOR_KEY_ENC_PORTS
,
242 struct flow_dissector_key_ports
*mask
=
243 skb_flow_dissector_target(f
->dissector
,
244 FLOW_DISSECTOR_KEY_ENC_PORTS
,
247 /* Full udp dst port must be given */
248 if (memchr_inv(&mask
->dst
, 0xff, sizeof(mask
->dst
)))
249 goto vxlan_match_offload_err
;
251 if (mlx5e_vxlan_lookup_port(priv
, be16_to_cpu(key
->dst
)) &&
252 MLX5_CAP_ESW(priv
->mdev
, vxlan_encap_decap
))
253 parse_vxlan_attr(spec
, f
);
255 netdev_warn(priv
->netdev
,
256 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key
->dst
));
260 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
261 udp_dport
, ntohs(mask
->dst
));
262 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
263 udp_dport
, ntohs(key
->dst
));
265 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
266 udp_sport
, ntohs(mask
->src
));
267 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
268 udp_sport
, ntohs(key
->src
));
269 } else { /* udp dst port must be given */
270 vxlan_match_offload_err
:
271 netdev_warn(priv
->netdev
,
272 "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
276 if (enc_control
->addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
277 struct flow_dissector_key_ipv4_addrs
*key
=
278 skb_flow_dissector_target(f
->dissector
,
279 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
,
281 struct flow_dissector_key_ipv4_addrs
*mask
=
282 skb_flow_dissector_target(f
->dissector
,
283 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
,
285 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
286 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
288 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
289 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
292 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
293 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
295 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
296 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
299 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ethertype
);
300 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
, ETH_P_IP
);
301 } else if (enc_control
->addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
302 struct flow_dissector_key_ipv6_addrs
*key
=
303 skb_flow_dissector_target(f
->dissector
,
304 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
,
306 struct flow_dissector_key_ipv6_addrs
*mask
=
307 skb_flow_dissector_target(f
->dissector
,
308 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
,
311 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
312 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
313 &mask
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
314 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
315 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
316 &key
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
318 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
319 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
320 &mask
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
321 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
322 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
323 &key
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
, ipv6
));
325 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ethertype
);
326 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
, ETH_P_IPV6
);
329 /* Enforce DMAC when offloading incoming tunneled flows.
330 * Flow counters require a match on the DMAC.
332 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_47_16
);
333 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_15_0
);
334 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
335 dmac_47_16
), priv
->netdev
->dev_addr
);
337 /* let software handle IP fragments */
338 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
339 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
, 0);
344 static int __parse_cls_flower(struct mlx5e_priv
*priv
,
345 struct mlx5_flow_spec
*spec
,
346 struct tc_cls_flower_offload
*f
,
349 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
351 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
356 *min_inline
= MLX5_INLINE_MODE_L2
;
358 if (f
->dissector
->used_keys
&
359 ~(BIT(FLOW_DISSECTOR_KEY_CONTROL
) |
360 BIT(FLOW_DISSECTOR_KEY_BASIC
) |
361 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS
) |
362 BIT(FLOW_DISSECTOR_KEY_VLAN
) |
363 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS
) |
364 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS
) |
365 BIT(FLOW_DISSECTOR_KEY_PORTS
) |
366 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID
) |
367 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) |
368 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
) |
369 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS
) |
370 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL
))) {
371 netdev_warn(priv
->netdev
, "Unsupported key used: 0x%x\n",
372 f
->dissector
->used_keys
);
376 if ((dissector_uses_key(f
->dissector
,
377 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) ||
378 dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_KEYID
) ||
379 dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_PORTS
)) &&
380 dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ENC_CONTROL
)) {
381 struct flow_dissector_key_control
*key
=
382 skb_flow_dissector_target(f
->dissector
,
383 FLOW_DISSECTOR_KEY_ENC_CONTROL
,
385 switch (key
->addr_type
) {
386 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
387 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
388 if (parse_tunnel_attr(priv
, spec
, f
))
395 /* In decap flow, header pointers should point to the inner
396 * headers, outer header were already set by parse_tunnel_attr
398 headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
400 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
404 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_CONTROL
)) {
405 struct flow_dissector_key_control
*key
=
406 skb_flow_dissector_target(f
->dissector
,
407 FLOW_DISSECTOR_KEY_CONTROL
,
410 struct flow_dissector_key_control
*mask
=
411 skb_flow_dissector_target(f
->dissector
,
412 FLOW_DISSECTOR_KEY_CONTROL
,
414 addr_type
= key
->addr_type
;
416 if (mask
->flags
& FLOW_DIS_IS_FRAGMENT
) {
417 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
418 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
,
419 key
->flags
& FLOW_DIS_IS_FRAGMENT
);
421 /* the HW doesn't need L3 inline to match on frag=no */
422 if (key
->flags
& FLOW_DIS_IS_FRAGMENT
)
423 *min_inline
= MLX5_INLINE_MODE_IP
;
427 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_BASIC
)) {
428 struct flow_dissector_key_basic
*key
=
429 skb_flow_dissector_target(f
->dissector
,
430 FLOW_DISSECTOR_KEY_BASIC
,
432 struct flow_dissector_key_basic
*mask
=
433 skb_flow_dissector_target(f
->dissector
,
434 FLOW_DISSECTOR_KEY_BASIC
,
436 ip_proto
= key
->ip_proto
;
438 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ethertype
,
439 ntohs(mask
->n_proto
));
440 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
441 ntohs(key
->n_proto
));
443 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
,
445 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
,
449 *min_inline
= MLX5_INLINE_MODE_IP
;
452 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_ETH_ADDRS
)) {
453 struct flow_dissector_key_eth_addrs
*key
=
454 skb_flow_dissector_target(f
->dissector
,
455 FLOW_DISSECTOR_KEY_ETH_ADDRS
,
457 struct flow_dissector_key_eth_addrs
*mask
=
458 skb_flow_dissector_target(f
->dissector
,
459 FLOW_DISSECTOR_KEY_ETH_ADDRS
,
462 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
465 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
469 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
472 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
477 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_VLAN
)) {
478 struct flow_dissector_key_vlan
*key
=
479 skb_flow_dissector_target(f
->dissector
,
480 FLOW_DISSECTOR_KEY_VLAN
,
482 struct flow_dissector_key_vlan
*mask
=
483 skb_flow_dissector_target(f
->dissector
,
484 FLOW_DISSECTOR_KEY_VLAN
,
486 if (mask
->vlan_id
|| mask
->vlan_priority
) {
487 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
, 1);
488 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
, 1);
490 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_vid
, mask
->vlan_id
);
491 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_vid
, key
->vlan_id
);
493 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_prio
, mask
->vlan_priority
);
494 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_prio
, key
->vlan_priority
);
498 if (addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
499 struct flow_dissector_key_ipv4_addrs
*key
=
500 skb_flow_dissector_target(f
->dissector
,
501 FLOW_DISSECTOR_KEY_IPV4_ADDRS
,
503 struct flow_dissector_key_ipv4_addrs
*mask
=
504 skb_flow_dissector_target(f
->dissector
,
505 FLOW_DISSECTOR_KEY_IPV4_ADDRS
,
508 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
509 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
510 &mask
->src
, sizeof(mask
->src
));
511 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
512 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
513 &key
->src
, sizeof(key
->src
));
514 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
515 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
516 &mask
->dst
, sizeof(mask
->dst
));
517 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
518 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
519 &key
->dst
, sizeof(key
->dst
));
521 if (mask
->src
|| mask
->dst
)
522 *min_inline
= MLX5_INLINE_MODE_IP
;
525 if (addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
526 struct flow_dissector_key_ipv6_addrs
*key
=
527 skb_flow_dissector_target(f
->dissector
,
528 FLOW_DISSECTOR_KEY_IPV6_ADDRS
,
530 struct flow_dissector_key_ipv6_addrs
*mask
=
531 skb_flow_dissector_target(f
->dissector
,
532 FLOW_DISSECTOR_KEY_IPV6_ADDRS
,
535 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
536 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
537 &mask
->src
, sizeof(mask
->src
));
538 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
539 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
540 &key
->src
, sizeof(key
->src
));
542 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
543 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
544 &mask
->dst
, sizeof(mask
->dst
));
545 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
546 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
547 &key
->dst
, sizeof(key
->dst
));
549 if (ipv6_addr_type(&mask
->src
) != IPV6_ADDR_ANY
||
550 ipv6_addr_type(&mask
->dst
) != IPV6_ADDR_ANY
)
551 *min_inline
= MLX5_INLINE_MODE_IP
;
554 if (dissector_uses_key(f
->dissector
, FLOW_DISSECTOR_KEY_PORTS
)) {
555 struct flow_dissector_key_ports
*key
=
556 skb_flow_dissector_target(f
->dissector
,
557 FLOW_DISSECTOR_KEY_PORTS
,
559 struct flow_dissector_key_ports
*mask
=
560 skb_flow_dissector_target(f
->dissector
,
561 FLOW_DISSECTOR_KEY_PORTS
,
565 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
566 tcp_sport
, ntohs(mask
->src
));
567 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
568 tcp_sport
, ntohs(key
->src
));
570 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
571 tcp_dport
, ntohs(mask
->dst
));
572 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
573 tcp_dport
, ntohs(key
->dst
));
577 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
578 udp_sport
, ntohs(mask
->src
));
579 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
580 udp_sport
, ntohs(key
->src
));
582 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
583 udp_dport
, ntohs(mask
->dst
));
584 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
585 udp_dport
, ntohs(key
->dst
));
588 netdev_err(priv
->netdev
,
589 "Only UDP and TCP transport are supported\n");
593 if (mask
->src
|| mask
->dst
)
594 *min_inline
= MLX5_INLINE_MODE_TCP_UDP
;
600 static int parse_cls_flower(struct mlx5e_priv
*priv
,
601 struct mlx5_flow_spec
*spec
,
602 struct tc_cls_flower_offload
*f
)
604 struct mlx5_core_dev
*dev
= priv
->mdev
;
605 struct mlx5_eswitch
*esw
= dev
->priv
.eswitch
;
606 struct mlx5_eswitch_rep
*rep
= priv
->ppriv
;
610 err
= __parse_cls_flower(priv
, spec
, f
, &min_inline
);
612 if (!err
&& esw
->mode
== SRIOV_OFFLOADS
&&
613 rep
->vport
!= FDB_UPLINK_VPORT
) {
614 if (min_inline
> esw
->offloads
.inline_mode
) {
615 netdev_warn(priv
->netdev
,
616 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
617 min_inline
, esw
->offloads
.inline_mode
);
625 static int parse_tc_nic_actions(struct mlx5e_priv
*priv
, struct tcf_exts
*exts
,
626 u32
*action
, u32
*flow_tag
)
628 const struct tc_action
*a
;
631 if (tc_no_actions(exts
))
634 *flow_tag
= MLX5_FS_DEFAULT_FLOW_TAG
;
637 tcf_exts_to_list(exts
, &actions
);
638 list_for_each_entry(a
, &actions
, list
) {
639 /* Only support a single action per rule */
643 if (is_tcf_gact_shot(a
)) {
644 *action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
;
645 if (MLX5_CAP_FLOWTABLE(priv
->mdev
,
646 flow_table_properties_nic_receive
.flow_counter
))
647 *action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
651 if (is_tcf_skbedit_mark(a
)) {
652 u32 mark
= tcf_skbedit_mark(a
);
654 if (mark
& ~MLX5E_TC_FLOW_ID_MASK
) {
655 netdev_warn(priv
->netdev
, "Bad flow mark - only 16 bit is supported: 0x%x\n",
661 *action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
671 static inline int cmp_encap_info(struct ip_tunnel_key
*a
,
672 struct ip_tunnel_key
*b
)
674 return memcmp(a
, b
, sizeof(*a
));
677 static inline int hash_encap_info(struct ip_tunnel_key
*key
)
679 return jhash(key
, sizeof(*key
), 0);
682 static int mlx5e_route_lookup_ipv4(struct mlx5e_priv
*priv
,
683 struct net_device
*mirred_dev
,
684 struct net_device
**out_dev
,
686 struct neighbour
**out_n
,
689 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
691 struct neighbour
*n
= NULL
;
693 #if IS_ENABLED(CONFIG_INET)
696 rt
= ip_route_output_key(dev_net(mirred_dev
), fl4
);
697 ret
= PTR_ERR_OR_ZERO(rt
);
703 /* if the egress device isn't on the same HW e-switch, we use the uplink */
704 if (!switchdev_port_same_parent_id(priv
->netdev
, rt
->dst
.dev
))
705 *out_dev
= mlx5_eswitch_get_uplink_netdev(esw
);
707 *out_dev
= rt
->dst
.dev
;
709 *out_ttl
= ip4_dst_hoplimit(&rt
->dst
);
710 n
= dst_neigh_lookup(&rt
->dst
, &fl4
->daddr
);
719 static int mlx5e_route_lookup_ipv6(struct mlx5e_priv
*priv
,
720 struct net_device
*mirred_dev
,
721 struct net_device
**out_dev
,
723 struct neighbour
**out_n
,
726 struct neighbour
*n
= NULL
;
727 struct dst_entry
*dst
;
729 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
730 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
733 dst
= ip6_route_output(dev_net(mirred_dev
), NULL
, fl6
);
740 *out_ttl
= ip6_dst_hoplimit(dst
);
742 /* if the egress device isn't on the same HW e-switch, we use the uplink */
743 if (!switchdev_port_same_parent_id(priv
->netdev
, dst
->dev
))
744 *out_dev
= mlx5_eswitch_get_uplink_netdev(esw
);
751 n
= dst_neigh_lookup(dst
, &fl6
->daddr
);
760 static int gen_vxlan_header_ipv4(struct net_device
*out_dev
,
762 unsigned char h_dest
[ETH_ALEN
],
769 int encap_size
= VXLAN_HLEN
+ sizeof(struct iphdr
) + ETH_HLEN
;
770 struct ethhdr
*eth
= (struct ethhdr
*)buf
;
771 struct iphdr
*ip
= (struct iphdr
*)((char *)eth
+ sizeof(struct ethhdr
));
772 struct udphdr
*udp
= (struct udphdr
*)((char *)ip
+ sizeof(struct iphdr
));
773 struct vxlanhdr
*vxh
= (struct vxlanhdr
*)((char *)udp
+ sizeof(struct udphdr
));
775 memset(buf
, 0, encap_size
);
777 ether_addr_copy(eth
->h_dest
, h_dest
);
778 ether_addr_copy(eth
->h_source
, out_dev
->dev_addr
);
779 eth
->h_proto
= htons(ETH_P_IP
);
785 ip
->protocol
= IPPROTO_UDP
;
789 udp
->dest
= udp_dst_port
;
790 vxh
->vx_flags
= VXLAN_HF_VNI
;
791 vxh
->vx_vni
= vxlan_vni_field(vx_vni
);
796 static int gen_vxlan_header_ipv6(struct net_device
*out_dev
,
798 unsigned char h_dest
[ETH_ALEN
],
800 struct in6_addr
*daddr
,
801 struct in6_addr
*saddr
,
805 int encap_size
= VXLAN_HLEN
+ sizeof(struct ipv6hdr
) + ETH_HLEN
;
806 struct ethhdr
*eth
= (struct ethhdr
*)buf
;
807 struct ipv6hdr
*ip6h
= (struct ipv6hdr
*)((char *)eth
+ sizeof(struct ethhdr
));
808 struct udphdr
*udp
= (struct udphdr
*)((char *)ip6h
+ sizeof(struct ipv6hdr
));
809 struct vxlanhdr
*vxh
= (struct vxlanhdr
*)((char *)udp
+ sizeof(struct udphdr
));
811 memset(buf
, 0, encap_size
);
813 ether_addr_copy(eth
->h_dest
, h_dest
);
814 ether_addr_copy(eth
->h_source
, out_dev
->dev_addr
);
815 eth
->h_proto
= htons(ETH_P_IPV6
);
817 ip6_flow_hdr(ip6h
, 0, 0);
818 /* the HW fills up ipv6 payload len */
819 ip6h
->nexthdr
= IPPROTO_UDP
;
820 ip6h
->hop_limit
= ttl
;
821 ip6h
->daddr
= *daddr
;
822 ip6h
->saddr
= *saddr
;
824 udp
->dest
= udp_dst_port
;
825 vxh
->vx_flags
= VXLAN_HF_VNI
;
826 vxh
->vx_vni
= vxlan_vni_field(vx_vni
);
831 static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv
*priv
,
832 struct net_device
*mirred_dev
,
833 struct mlx5_encap_entry
*e
,
834 struct net_device
**out_dev
)
836 int max_encap_size
= MLX5_CAP_ESW(priv
->mdev
, max_encap_header_size
);
837 struct ip_tunnel_key
*tun_key
= &e
->tun_info
.key
;
838 int encap_size
, ttl
, err
;
839 struct neighbour
*n
= NULL
;
840 struct flowi4 fl4
= {};
843 encap_header
= kzalloc(max_encap_size
, GFP_KERNEL
);
847 switch (e
->tunnel_type
) {
848 case MLX5_HEADER_TYPE_VXLAN
:
849 fl4
.flowi4_proto
= IPPROTO_UDP
;
850 fl4
.fl4_dport
= tun_key
->tp_dst
;
856 fl4
.flowi4_tos
= tun_key
->tos
;
857 fl4
.daddr
= tun_key
->u
.ipv4
.dst
;
858 fl4
.saddr
= tun_key
->u
.ipv4
.src
;
860 err
= mlx5e_route_lookup_ipv4(priv
, mirred_dev
, out_dev
,
865 if (!(n
->nud_state
& NUD_VALID
)) {
866 pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__
, &fl4
.daddr
);
872 e
->out_dev
= *out_dev
;
874 neigh_ha_snapshot(e
->h_dest
, n
, *out_dev
);
876 switch (e
->tunnel_type
) {
877 case MLX5_HEADER_TYPE_VXLAN
:
878 encap_size
= gen_vxlan_header_ipv4(*out_dev
, encap_header
,
881 fl4
.saddr
, tun_key
->tp_dst
,
882 tunnel_id_to_key32(tun_key
->tun_id
));
889 err
= mlx5_encap_alloc(priv
->mdev
, e
->tunnel_type
,
890 encap_size
, encap_header
, &e
->encap_id
);
898 static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv
*priv
,
899 struct net_device
*mirred_dev
,
900 struct mlx5_encap_entry
*e
,
901 struct net_device
**out_dev
)
904 int max_encap_size
= MLX5_CAP_ESW(priv
->mdev
, max_encap_header_size
);
905 struct ip_tunnel_key
*tun_key
= &e
->tun_info
.key
;
906 int encap_size
, err
, ttl
= 0;
907 struct neighbour
*n
= NULL
;
908 struct flowi6 fl6
= {};
911 encap_header
= kzalloc(max_encap_size
, GFP_KERNEL
);
915 switch (e
->tunnel_type
) {
916 case MLX5_HEADER_TYPE_VXLAN
:
917 fl6
.flowi6_proto
= IPPROTO_UDP
;
918 fl6
.fl6_dport
= tun_key
->tp_dst
;
925 fl6
.flowlabel
= ip6_make_flowinfo(RT_TOS(tun_key
->tos
), tun_key
->label
);
926 fl6
.daddr
= tun_key
->u
.ipv6
.dst
;
927 fl6
.saddr
= tun_key
->u
.ipv6
.src
;
929 err
= mlx5e_route_lookup_ipv6(priv
, mirred_dev
, out_dev
,
934 if (!(n
->nud_state
& NUD_VALID
)) {
935 pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__
, &fl6
.daddr
);
941 e
->out_dev
= *out_dev
;
943 neigh_ha_snapshot(e
->h_dest
, n
, *out_dev
);
945 switch (e
->tunnel_type
) {
946 case MLX5_HEADER_TYPE_VXLAN
:
947 encap_size
= gen_vxlan_header_ipv6(*out_dev
, encap_header
,
950 &fl6
.saddr
, tun_key
->tp_dst
,
951 tunnel_id_to_key32(tun_key
->tun_id
));
958 err
= mlx5_encap_alloc(priv
->mdev
, e
->tunnel_type
,
959 encap_size
, encap_header
, &e
->encap_id
);
967 static int mlx5e_attach_encap(struct mlx5e_priv
*priv
,
968 struct ip_tunnel_info
*tun_info
,
969 struct net_device
*mirred_dev
,
970 struct mlx5_esw_flow_attr
*attr
)
972 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
973 unsigned short family
= ip_tunnel_info_af(tun_info
);
974 struct ip_tunnel_key
*key
= &tun_info
->key
;
975 struct mlx5_encap_entry
*e
;
976 struct net_device
*out_dev
;
977 int tunnel_type
, err
= -EOPNOTSUPP
;
981 /* udp dst port must be set */
982 if (!memchr_inv(&key
->tp_dst
, 0, sizeof(key
->tp_dst
)))
983 goto vxlan_encap_offload_err
;
985 /* setting udp src port isn't supported */
986 if (memchr_inv(&key
->tp_src
, 0, sizeof(key
->tp_src
))) {
987 vxlan_encap_offload_err
:
988 netdev_warn(priv
->netdev
,
989 "must set udp dst port and not set udp src port\n");
993 if (mlx5e_vxlan_lookup_port(priv
, be16_to_cpu(key
->tp_dst
)) &&
994 MLX5_CAP_ESW(priv
->mdev
, vxlan_encap_decap
)) {
995 tunnel_type
= MLX5_HEADER_TYPE_VXLAN
;
997 netdev_warn(priv
->netdev
,
998 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key
->tp_dst
));
1002 hash_key
= hash_encap_info(key
);
1004 hash_for_each_possible_rcu(esw
->offloads
.encap_tbl
, e
,
1005 encap_hlist
, hash_key
) {
1006 if (!cmp_encap_info(&e
->tun_info
.key
, key
)) {
1017 e
= kzalloc(sizeof(*e
), GFP_KERNEL
);
1021 e
->tun_info
= *tun_info
;
1022 e
->tunnel_type
= tunnel_type
;
1023 INIT_LIST_HEAD(&e
->flows
);
1025 if (family
== AF_INET
)
1026 err
= mlx5e_create_encap_header_ipv4(priv
, mirred_dev
, e
, &out_dev
);
1027 else if (family
== AF_INET6
)
1028 err
= mlx5e_create_encap_header_ipv6(priv
, mirred_dev
, e
, &out_dev
);
1034 hash_add_rcu(esw
->offloads
.encap_tbl
, &e
->encap_hlist
, hash_key
);
1043 static int parse_tc_fdb_actions(struct mlx5e_priv
*priv
, struct tcf_exts
*exts
,
1044 struct mlx5e_tc_flow
*flow
)
1046 struct mlx5_esw_flow_attr
*attr
= flow
->attr
;
1047 struct ip_tunnel_info
*info
= NULL
;
1048 const struct tc_action
*a
;
1053 if (tc_no_actions(exts
))
1056 memset(attr
, 0, sizeof(*attr
));
1057 attr
->in_rep
= priv
->ppriv
;
1059 tcf_exts_to_list(exts
, &actions
);
1060 list_for_each_entry(a
, &actions
, list
) {
1061 if (is_tcf_gact_shot(a
)) {
1062 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
|
1063 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1067 if (is_tcf_mirred_egress_redirect(a
)) {
1068 int ifindex
= tcf_mirred_ifindex(a
);
1069 struct net_device
*out_dev
;
1070 struct mlx5e_priv
*out_priv
;
1072 out_dev
= __dev_get_by_index(dev_net(priv
->netdev
), ifindex
);
1074 if (switchdev_port_same_parent_id(priv
->netdev
,
1076 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
1077 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1078 out_priv
= netdev_priv(out_dev
);
1079 attr
->out_rep
= out_priv
->ppriv
;
1081 err
= mlx5e_attach_encap(priv
, info
,
1085 list_add(&flow
->encap
, &attr
->encap
->flows
);
1086 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_ENCAP
|
1087 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
1088 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
1089 out_priv
= netdev_priv(attr
->encap
->out_dev
);
1090 attr
->out_rep
= out_priv
->ppriv
;
1092 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
1093 priv
->netdev
->name
, out_dev
->name
);
1099 if (is_tcf_tunnel_set(a
)) {
1100 info
= tcf_tunnel_info(a
);
1108 if (is_tcf_vlan(a
)) {
1109 if (tcf_vlan_action(a
) == VLAN_F_POP
) {
1110 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
1111 } else if (tcf_vlan_action(a
) == VLAN_F_PUSH
) {
1112 if (tcf_vlan_push_proto(a
) != htons(ETH_P_8021Q
))
1115 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
;
1116 attr
->vlan
= tcf_vlan_push_vid(a
);
1121 if (is_tcf_tunnel_release(a
)) {
1122 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DECAP
;
1131 int mlx5e_configure_flower(struct mlx5e_priv
*priv
, __be16 protocol
,
1132 struct tc_cls_flower_offload
*f
)
1134 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1136 bool fdb_flow
= false;
1137 u32 flow_tag
, action
;
1138 struct mlx5e_tc_flow
*flow
;
1139 struct mlx5_flow_spec
*spec
;
1140 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1142 if (esw
&& esw
->mode
== SRIOV_OFFLOADS
)
1146 flow
= kzalloc(sizeof(*flow
) +
1147 sizeof(struct mlx5_esw_flow_attr
),
1150 flow
= kzalloc(sizeof(*flow
), GFP_KERNEL
);
1152 spec
= mlx5_vzalloc(sizeof(*spec
));
1153 if (!spec
|| !flow
) {
1158 flow
->cookie
= f
->cookie
;
1160 err
= parse_cls_flower(priv
, spec
, f
);
1165 flow
->attr
= (struct mlx5_esw_flow_attr
*)(flow
+ 1);
1166 err
= parse_tc_fdb_actions(priv
, f
->exts
, flow
);
1169 flow
->rule
= mlx5e_tc_add_fdb_flow(priv
, spec
, flow
->attr
);
1171 err
= parse_tc_nic_actions(priv
, f
->exts
, &action
, &flow_tag
);
1174 flow
->rule
= mlx5e_tc_add_nic_flow(priv
, spec
, action
, flow_tag
);
1177 if (IS_ERR(flow
->rule
)) {
1178 err
= PTR_ERR(flow
->rule
);
1182 err
= rhashtable_insert_fast(&tc
->ht
, &flow
->node
,
1190 mlx5e_tc_del_flow(priv
, flow
);
1199 int mlx5e_delete_flower(struct mlx5e_priv
*priv
,
1200 struct tc_cls_flower_offload
*f
)
1202 struct mlx5e_tc_flow
*flow
;
1203 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1205 flow
= rhashtable_lookup_fast(&tc
->ht
, &f
->cookie
,
1210 rhashtable_remove_fast(&tc
->ht
, &flow
->node
, tc
->ht_params
);
1212 mlx5e_tc_del_flow(priv
, flow
);
1220 int mlx5e_stats_flower(struct mlx5e_priv
*priv
,
1221 struct tc_cls_flower_offload
*f
)
1223 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1224 struct mlx5e_tc_flow
*flow
;
1225 struct tc_action
*a
;
1226 struct mlx5_fc
*counter
;
1232 flow
= rhashtable_lookup_fast(&tc
->ht
, &f
->cookie
,
1237 counter
= mlx5_flow_rule_counter(flow
->rule
);
1241 mlx5_fc_query_cached(counter
, &bytes
, &packets
, &lastuse
);
1243 tcf_exts_to_list(f
->exts
, &actions
);
1244 list_for_each_entry(a
, &actions
, list
)
1245 tcf_action_stats_update(a
, bytes
, packets
, lastuse
);
1250 static const struct rhashtable_params mlx5e_tc_flow_ht_params
= {
1251 .head_offset
= offsetof(struct mlx5e_tc_flow
, node
),
1252 .key_offset
= offsetof(struct mlx5e_tc_flow
, cookie
),
1253 .key_len
= sizeof(((struct mlx5e_tc_flow
*)0)->cookie
),
1254 .automatic_shrinking
= true,
1257 int mlx5e_tc_init(struct mlx5e_priv
*priv
)
1259 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1261 tc
->ht_params
= mlx5e_tc_flow_ht_params
;
1262 return rhashtable_init(&tc
->ht
, &tc
->ht_params
);
1265 static void _mlx5e_tc_del_flow(void *ptr
, void *arg
)
1267 struct mlx5e_tc_flow
*flow
= ptr
;
1268 struct mlx5e_priv
*priv
= arg
;
1270 mlx5e_tc_del_flow(priv
, flow
);
1274 void mlx5e_tc_cleanup(struct mlx5e_priv
*priv
)
1276 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1278 rhashtable_free_and_destroy(&tc
->ht
, _mlx5e_tc_del_flow
, priv
);
1280 if (!IS_ERR_OR_NULL(tc
->t
)) {
1281 mlx5_destroy_flow_table(tc
->t
);