2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <linux/mlx5/fs.h>
38 #include <linux/mlx5/device.h>
39 #include <linux/rhashtable.h>
40 #include <linux/refcount.h>
41 #include <linux/completion.h>
42 #include <net/tc_act/tc_pedit.h>
43 #include <net/tc_act/tc_csum.h>
44 #include <net/psample.h>
46 #include <net/ipv6_stubs.h>
47 #include <net/bareudp.h>
48 #include <net/bonding.h>
50 #include "en/tc/post_act.h"
52 #include "en/rep/tc.h"
53 #include "en/rep/neigh.h"
58 #include "en/tc_tun.h"
59 #include "en/mapping.h"
61 #include "en/mod_hdr.h"
62 #include "en/tc_tun_encap.h"
63 #include "en/tc/sample.h"
64 #include "lib/devcom.h"
65 #include "lib/geneve.h"
66 #include "lib/fs_chains.h"
67 #include "diag/en_tc_tracepoint.h"
68 #include <asm/div64.h>
72 #define nic_chains(priv) ((priv)->fs.tc.chains)
73 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
75 #define MLX5E_TC_TABLE_NUM_GROUPS 4
76 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
78 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings
[] = {
80 .mfield
= MLX5_ACTION_IN_FIELD_METADATA_REG_C_0
,
85 .mfield
= MLX5_ACTION_IN_FIELD_METADATA_REG_C_0
,
90 .mfield
= MLX5_ACTION_IN_FIELD_METADATA_REG_C_1
,
92 .mlen
= ESW_TUN_OPTS_BITS
+ ESW_TUN_ID_BITS
,
93 .soffset
= MLX5_BYTE_OFF(fte_match_param
,
94 misc_parameters_2
.metadata_reg_c_1
),
96 [ZONE_TO_REG
] = zone_to_reg_ct
,
97 [ZONE_RESTORE_TO_REG
] = zone_restore_to_reg_ct
,
98 [CTSTATE_TO_REG
] = ctstate_to_reg_ct
,
99 [MARK_TO_REG
] = mark_to_reg_ct
,
100 [LABELS_TO_REG
] = labels_to_reg_ct
,
101 [FTEID_TO_REG
] = fteid_to_reg_ct
,
102 /* For NIC rules we store the restore metadata directly
103 * into reg_b that is passed to SW since we don't
104 * jump between steering domains.
106 [NIC_CHAIN_TO_REG
] = {
107 .mfield
= MLX5_ACTION_IN_FIELD_METADATA_REG_B
,
111 [NIC_ZONE_RESTORE_TO_REG
] = nic_zone_restore_to_reg_ct
,
114 /* To avoid false lock dependency warning set the tc_ht lock
115 * class different than the lock class of the ht being used when deleting
116 * last flow from a group and then deleting a group, we get into del_sw_flow_group()
117 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
118 * it's different than the ht->mutex here.
120 static struct lock_class_key tc_ht_lock_key
;
122 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow
*flow
);
125 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec
*spec
,
126 enum mlx5e_tc_attr_to_reg type
,
130 void *headers_c
= spec
->match_criteria
, *headers_v
= spec
->match_value
, *fmask
, *fval
;
131 int soffset
= mlx5e_tc_attr_to_reg_mappings
[type
].soffset
;
132 int moffset
= mlx5e_tc_attr_to_reg_mappings
[type
].moffset
;
133 int match_len
= mlx5e_tc_attr_to_reg_mappings
[type
].mlen
;
134 u32 max_mask
= GENMASK(match_len
- 1, 0);
135 __be32 curr_mask_be
, curr_val_be
;
136 u32 curr_mask
, curr_val
;
138 fmask
= headers_c
+ soffset
;
139 fval
= headers_v
+ soffset
;
141 memcpy(&curr_mask_be
, fmask
, 4);
142 memcpy(&curr_val_be
, fval
, 4);
144 curr_mask
= be32_to_cpu(curr_mask_be
);
145 curr_val
= be32_to_cpu(curr_val_be
);
147 //move to correct offset
148 WARN_ON(mask
> max_mask
);
151 max_mask
<<= moffset
;
154 curr_mask
&= ~max_mask
;
155 curr_val
&= ~max_mask
;
157 //add current to mask
161 //back to be32 and write
162 curr_mask_be
= cpu_to_be32(curr_mask
);
163 curr_val_be
= cpu_to_be32(curr_val
);
165 memcpy(fmask
, &curr_mask_be
, 4);
166 memcpy(fval
, &curr_val_be
, 4);
168 spec
->match_criteria_enable
|= MLX5_MATCH_MISC_PARAMETERS_2
;
172 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec
*spec
,
173 enum mlx5e_tc_attr_to_reg type
,
177 void *headers_c
= spec
->match_criteria
, *headers_v
= spec
->match_value
, *fmask
, *fval
;
178 int soffset
= mlx5e_tc_attr_to_reg_mappings
[type
].soffset
;
179 int moffset
= mlx5e_tc_attr_to_reg_mappings
[type
].moffset
;
180 int match_len
= mlx5e_tc_attr_to_reg_mappings
[type
].mlen
;
181 u32 max_mask
= GENMASK(match_len
- 1, 0);
182 __be32 curr_mask_be
, curr_val_be
;
183 u32 curr_mask
, curr_val
;
185 fmask
= headers_c
+ soffset
;
186 fval
= headers_v
+ soffset
;
188 memcpy(&curr_mask_be
, fmask
, 4);
189 memcpy(&curr_val_be
, fval
, 4);
191 curr_mask
= be32_to_cpu(curr_mask_be
);
192 curr_val
= be32_to_cpu(curr_val_be
);
194 *mask
= (curr_mask
>> moffset
) & max_mask
;
195 *val
= (curr_val
>> moffset
) & max_mask
;
199 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev
*mdev
,
200 struct mlx5e_tc_mod_hdr_acts
*mod_hdr_acts
,
201 enum mlx5_flow_namespace_type ns
,
202 enum mlx5e_tc_attr_to_reg type
,
205 int moffset
= mlx5e_tc_attr_to_reg_mappings
[type
].moffset
;
206 int mfield
= mlx5e_tc_attr_to_reg_mappings
[type
].mfield
;
207 int mlen
= mlx5e_tc_attr_to_reg_mappings
[type
].mlen
;
211 err
= alloc_mod_hdr_actions(mdev
, ns
, mod_hdr_acts
);
215 modact
= mod_hdr_acts
->actions
+
216 (mod_hdr_acts
->num_actions
* MLX5_MH_ACT_SZ
);
218 /* Firmware has 5bit length field and 0 means 32bits */
222 MLX5_SET(set_action_in
, modact
, action_type
, MLX5_ACTION_TYPE_SET
);
223 MLX5_SET(set_action_in
, modact
, field
, mfield
);
224 MLX5_SET(set_action_in
, modact
, offset
, moffset
);
225 MLX5_SET(set_action_in
, modact
, length
, mlen
);
226 MLX5_SET(set_action_in
, modact
, data
, data
);
227 err
= mod_hdr_acts
->num_actions
;
228 mod_hdr_acts
->num_actions
++;
233 static struct mlx5_tc_ct_priv
*
234 get_ct_priv(struct mlx5e_priv
*priv
)
236 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
237 struct mlx5_rep_uplink_priv
*uplink_priv
;
238 struct mlx5e_rep_priv
*uplink_rpriv
;
240 if (is_mdev_switchdev_mode(priv
->mdev
)) {
241 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
242 uplink_priv
= &uplink_rpriv
->uplink_priv
;
244 return uplink_priv
->ct_priv
;
247 return priv
->fs
.tc
.ct
;
250 static struct mlx5e_tc_psample
*
251 get_sample_priv(struct mlx5e_priv
*priv
)
253 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
254 struct mlx5_rep_uplink_priv
*uplink_priv
;
255 struct mlx5e_rep_priv
*uplink_rpriv
;
257 if (is_mdev_switchdev_mode(priv
->mdev
)) {
258 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
259 uplink_priv
= &uplink_rpriv
->uplink_priv
;
261 return uplink_priv
->tc_psample
;
267 struct mlx5_flow_handle
*
268 mlx5_tc_rule_insert(struct mlx5e_priv
*priv
,
269 struct mlx5_flow_spec
*spec
,
270 struct mlx5_flow_attr
*attr
)
272 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
274 if (is_mdev_switchdev_mode(priv
->mdev
))
275 return mlx5_eswitch_add_offloaded_rule(esw
, spec
, attr
);
277 return mlx5e_add_offloaded_nic_rule(priv
, spec
, attr
);
281 mlx5_tc_rule_delete(struct mlx5e_priv
*priv
,
282 struct mlx5_flow_handle
*rule
,
283 struct mlx5_flow_attr
*attr
)
285 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
287 if (is_mdev_switchdev_mode(priv
->mdev
)) {
288 mlx5_eswitch_del_offloaded_rule(esw
, rule
, attr
);
293 mlx5e_del_offloaded_nic_rule(priv
, rule
, attr
);
297 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev
*mdev
,
298 struct mlx5e_tc_mod_hdr_acts
*mod_hdr_acts
,
299 enum mlx5_flow_namespace_type ns
,
300 enum mlx5e_tc_attr_to_reg type
,
303 int ret
= mlx5e_tc_match_to_reg_set_and_get_id(mdev
, mod_hdr_acts
, ns
, type
, data
);
305 return ret
< 0 ? ret
: 0;
308 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev
*mdev
,
309 struct mlx5e_tc_mod_hdr_acts
*mod_hdr_acts
,
310 enum mlx5e_tc_attr_to_reg type
,
311 int act_id
, u32 data
)
313 int moffset
= mlx5e_tc_attr_to_reg_mappings
[type
].moffset
;
314 int mfield
= mlx5e_tc_attr_to_reg_mappings
[type
].mfield
;
315 int mlen
= mlx5e_tc_attr_to_reg_mappings
[type
].mlen
;
318 modact
= mod_hdr_acts
->actions
+ (act_id
* MLX5_MH_ACT_SZ
);
320 /* Firmware has 5bit length field and 0 means 32bits */
324 MLX5_SET(set_action_in
, modact
, action_type
, MLX5_ACTION_TYPE_SET
);
325 MLX5_SET(set_action_in
, modact
, field
, mfield
);
326 MLX5_SET(set_action_in
, modact
, offset
, moffset
);
327 MLX5_SET(set_action_in
, modact
, length
, mlen
);
328 MLX5_SET(set_action_in
, modact
, data
, data
);
331 struct mlx5e_hairpin
{
332 struct mlx5_hairpin
*pair
;
334 struct mlx5_core_dev
*func_mdev
;
335 struct mlx5e_priv
*func_priv
;
337 struct mlx5e_tir direct_tir
;
340 struct mlx5e_rqt indir_rqt
;
341 struct mlx5e_tir indir_tir
[MLX5E_NUM_INDIR_TIRS
];
342 struct mlx5_ttc_table
*ttc
;
345 struct mlx5e_hairpin_entry
{
346 /* a node of a hash table which keeps all the hairpin entries */
347 struct hlist_node hairpin_hlist
;
349 /* protects flows list */
350 spinlock_t flows_lock
;
351 /* flows sharing the same hairpin */
352 struct list_head flows
;
353 /* hpe's that were not fully initialized when dead peer update event
354 * function traversed them.
356 struct list_head dead_peer_wait_list
;
360 struct mlx5e_hairpin
*hp
;
362 struct completion res_ready
;
365 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
366 struct mlx5e_tc_flow
*flow
);
368 struct mlx5e_tc_flow
*mlx5e_flow_get(struct mlx5e_tc_flow
*flow
)
370 if (!flow
|| !refcount_inc_not_zero(&flow
->refcnt
))
371 return ERR_PTR(-EINVAL
);
375 void mlx5e_flow_put(struct mlx5e_priv
*priv
, struct mlx5e_tc_flow
*flow
)
377 if (refcount_dec_and_test(&flow
->refcnt
)) {
378 mlx5e_tc_del_flow(priv
, flow
);
379 kfree_rcu(flow
, rcu_head
);
383 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow
*flow
)
385 return flow_flag_test(flow
, ESWITCH
);
388 static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow
*flow
)
390 return flow_flag_test(flow
, FT
);
393 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow
*flow
)
395 return flow_flag_test(flow
, OFFLOADED
);
398 static int get_flow_name_space(struct mlx5e_tc_flow
*flow
)
400 return mlx5e_is_eswitch_flow(flow
) ?
401 MLX5_FLOW_NAMESPACE_FDB
: MLX5_FLOW_NAMESPACE_KERNEL
;
404 static struct mod_hdr_tbl
*
405 get_mod_hdr_table(struct mlx5e_priv
*priv
, struct mlx5e_tc_flow
*flow
)
407 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
409 return get_flow_name_space(flow
) == MLX5_FLOW_NAMESPACE_FDB
?
410 &esw
->offloads
.mod_hdr
:
411 &priv
->fs
.tc
.mod_hdr
;
414 static int mlx5e_attach_mod_hdr(struct mlx5e_priv
*priv
,
415 struct mlx5e_tc_flow
*flow
,
416 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
418 struct mlx5_modify_hdr
*modify_hdr
;
419 struct mlx5e_mod_hdr_handle
*mh
;
421 mh
= mlx5e_mod_hdr_attach(priv
->mdev
, get_mod_hdr_table(priv
, flow
),
422 get_flow_name_space(flow
),
423 &parse_attr
->mod_hdr_acts
);
427 modify_hdr
= mlx5e_mod_hdr_get(mh
);
428 flow
->attr
->modify_hdr
= modify_hdr
;
434 static void mlx5e_detach_mod_hdr(struct mlx5e_priv
*priv
,
435 struct mlx5e_tc_flow
*flow
)
437 /* flow wasn't fully initialized */
441 mlx5e_mod_hdr_detach(priv
->mdev
, get_mod_hdr_table(priv
, flow
),
447 struct mlx5_core_dev
*mlx5e_hairpin_get_mdev(struct net
*net
, int ifindex
)
449 struct mlx5_core_dev
*mdev
;
450 struct net_device
*netdev
;
451 struct mlx5e_priv
*priv
;
453 netdev
= dev_get_by_index(net
, ifindex
);
455 return ERR_PTR(-ENODEV
);
457 priv
= netdev_priv(netdev
);
461 /* Mirred tc action holds a refcount on the ifindex net_device (see
462 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
463 * after dev_put(netdev), while we're in the context of adding a tc flow.
465 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
466 * stored in a hairpin object, which exists until all flows, that refer to it, get
469 * On the other hand, after a hairpin object has been created, the peer net_device may
470 * be removed/unbound while there are still some hairpin flows that are using it. This
471 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
472 * NETDEV_UNREGISTER event of the peer net_device.
477 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin
*hp
)
479 struct mlx5e_tir_builder
*builder
;
482 builder
= mlx5e_tir_builder_alloc(false);
486 err
= mlx5_core_alloc_transport_domain(hp
->func_mdev
, &hp
->tdn
);
490 mlx5e_tir_builder_build_inline(builder
, hp
->tdn
, hp
->pair
->rqn
[0]);
491 err
= mlx5e_tir_init(&hp
->direct_tir
, builder
, hp
->func_mdev
, false);
496 mlx5e_tir_builder_free(builder
);
500 mlx5_core_dealloc_transport_domain(hp
->func_mdev
, hp
->tdn
);
505 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin
*hp
)
507 mlx5e_tir_destroy(&hp
->direct_tir
);
508 mlx5_core_dealloc_transport_domain(hp
->func_mdev
, hp
->tdn
);
511 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin
*hp
)
513 struct mlx5e_priv
*priv
= hp
->func_priv
;
514 struct mlx5_core_dev
*mdev
= priv
->mdev
;
515 struct mlx5e_rss_params_indir
*indir
;
518 indir
= kvmalloc(sizeof(*indir
), GFP_KERNEL
);
522 mlx5e_rss_params_indir_init_uniform(indir
, hp
->num_channels
);
523 err
= mlx5e_rqt_init_indir(&hp
->indir_rqt
, mdev
, hp
->pair
->rqn
, hp
->num_channels
,
524 mlx5e_rx_res_get_current_hash(priv
->rx_res
).hfunc
,
531 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin
*hp
)
533 struct mlx5e_priv
*priv
= hp
->func_priv
;
534 struct mlx5e_rss_params_hash rss_hash
;
535 enum mlx5_traffic_types tt
, max_tt
;
536 struct mlx5e_tir_builder
*builder
;
539 builder
= mlx5e_tir_builder_alloc(false);
543 rss_hash
= mlx5e_rx_res_get_current_hash(priv
->rx_res
);
545 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++) {
546 struct mlx5e_rss_params_traffic_type rss_tt
;
548 rss_tt
= mlx5e_rss_get_default_tt_config(tt
);
550 mlx5e_tir_builder_build_rqt(builder
, hp
->tdn
,
551 mlx5e_rqt_get_rqtn(&hp
->indir_rqt
),
553 mlx5e_tir_builder_build_rss(builder
, &rss_hash
, &rss_tt
, false);
555 err
= mlx5e_tir_init(&hp
->indir_tir
[tt
], builder
, hp
->func_mdev
, false);
557 mlx5_core_warn(hp
->func_mdev
, "create indirect tirs failed, %d\n", err
);
558 goto err_destroy_tirs
;
561 mlx5e_tir_builder_clear(builder
);
565 mlx5e_tir_builder_free(builder
);
570 for (tt
= 0; tt
< max_tt
; tt
++)
571 mlx5e_tir_destroy(&hp
->indir_tir
[tt
]);
576 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin
*hp
)
580 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++)
581 mlx5e_tir_destroy(&hp
->indir_tir
[tt
]);
584 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin
*hp
,
585 struct ttc_params
*ttc_params
)
587 struct mlx5_flow_table_attr
*ft_attr
= &ttc_params
->ft_attr
;
590 memset(ttc_params
, 0, sizeof(*ttc_params
));
592 ttc_params
->ns
= mlx5_get_flow_namespace(hp
->func_mdev
,
593 MLX5_FLOW_NAMESPACE_KERNEL
);
594 for (tt
= 0; tt
< MLX5_NUM_TT
; tt
++) {
595 ttc_params
->dests
[tt
].type
= MLX5_FLOW_DESTINATION_TYPE_TIR
;
596 ttc_params
->dests
[tt
].tir_num
=
598 mlx5e_tir_get_tirn(&hp
->direct_tir
) :
599 mlx5e_tir_get_tirn(&hp
->indir_tir
[tt
]);
602 ft_attr
->level
= MLX5E_TC_TTC_FT_LEVEL
;
603 ft_attr
->prio
= MLX5E_TC_PRIO
;
606 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin
*hp
)
608 struct mlx5e_priv
*priv
= hp
->func_priv
;
609 struct ttc_params ttc_params
;
612 err
= mlx5e_hairpin_create_indirect_rqt(hp
);
616 err
= mlx5e_hairpin_create_indirect_tirs(hp
);
618 goto err_create_indirect_tirs
;
620 mlx5e_hairpin_set_ttc_params(hp
, &ttc_params
);
621 hp
->ttc
= mlx5_create_ttc_table(priv
->mdev
, &ttc_params
);
622 if (IS_ERR(hp
->ttc
)) {
623 err
= PTR_ERR(hp
->ttc
);
624 goto err_create_ttc_table
;
627 netdev_dbg(priv
->netdev
, "add hairpin: using %d channels rss ttc table id %x\n",
629 mlx5_get_ttc_flow_table(priv
->fs
.ttc
)->id
);
633 err_create_ttc_table
:
634 mlx5e_hairpin_destroy_indirect_tirs(hp
);
635 err_create_indirect_tirs
:
636 mlx5e_rqt_destroy(&hp
->indir_rqt
);
641 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin
*hp
)
643 mlx5_destroy_ttc_table(hp
->ttc
);
644 mlx5e_hairpin_destroy_indirect_tirs(hp
);
645 mlx5e_rqt_destroy(&hp
->indir_rqt
);
648 static struct mlx5e_hairpin
*
649 mlx5e_hairpin_create(struct mlx5e_priv
*priv
, struct mlx5_hairpin_params
*params
,
652 struct mlx5_core_dev
*func_mdev
, *peer_mdev
;
653 struct mlx5e_hairpin
*hp
;
654 struct mlx5_hairpin
*pair
;
657 hp
= kzalloc(sizeof(*hp
), GFP_KERNEL
);
659 return ERR_PTR(-ENOMEM
);
661 func_mdev
= priv
->mdev
;
662 peer_mdev
= mlx5e_hairpin_get_mdev(dev_net(priv
->netdev
), peer_ifindex
);
663 if (IS_ERR(peer_mdev
)) {
664 err
= PTR_ERR(peer_mdev
);
665 goto create_pair_err
;
668 pair
= mlx5_core_hairpin_create(func_mdev
, peer_mdev
, params
);
671 goto create_pair_err
;
674 hp
->func_mdev
= func_mdev
;
675 hp
->func_priv
= priv
;
676 hp
->num_channels
= params
->num_channels
;
678 err
= mlx5e_hairpin_create_transport(hp
);
680 goto create_transport_err
;
682 if (hp
->num_channels
> 1) {
683 err
= mlx5e_hairpin_rss_init(hp
);
691 mlx5e_hairpin_destroy_transport(hp
);
692 create_transport_err
:
693 mlx5_core_hairpin_destroy(hp
->pair
);
699 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin
*hp
)
701 if (hp
->num_channels
> 1)
702 mlx5e_hairpin_rss_cleanup(hp
);
703 mlx5e_hairpin_destroy_transport(hp
);
704 mlx5_core_hairpin_destroy(hp
->pair
);
708 static inline u32
hash_hairpin_info(u16 peer_vhca_id
, u8 prio
)
710 return (peer_vhca_id
<< 16 | prio
);
713 static struct mlx5e_hairpin_entry
*mlx5e_hairpin_get(struct mlx5e_priv
*priv
,
714 u16 peer_vhca_id
, u8 prio
)
716 struct mlx5e_hairpin_entry
*hpe
;
717 u32 hash_key
= hash_hairpin_info(peer_vhca_id
, prio
);
719 hash_for_each_possible(priv
->fs
.tc
.hairpin_tbl
, hpe
,
720 hairpin_hlist
, hash_key
) {
721 if (hpe
->peer_vhca_id
== peer_vhca_id
&& hpe
->prio
== prio
) {
722 refcount_inc(&hpe
->refcnt
);
730 static void mlx5e_hairpin_put(struct mlx5e_priv
*priv
,
731 struct mlx5e_hairpin_entry
*hpe
)
733 /* no more hairpin flows for us, release the hairpin pair */
734 if (!refcount_dec_and_mutex_lock(&hpe
->refcnt
, &priv
->fs
.tc
.hairpin_tbl_lock
))
736 hash_del(&hpe
->hairpin_hlist
);
737 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
739 if (!IS_ERR_OR_NULL(hpe
->hp
)) {
740 netdev_dbg(priv
->netdev
, "del hairpin: peer %s\n",
741 dev_name(hpe
->hp
->pair
->peer_mdev
->device
));
743 mlx5e_hairpin_destroy(hpe
->hp
);
746 WARN_ON(!list_empty(&hpe
->flows
));
750 #define UNKNOWN_MATCH_PRIO 8
752 static int mlx5e_hairpin_get_prio(struct mlx5e_priv
*priv
,
753 struct mlx5_flow_spec
*spec
, u8
*match_prio
,
754 struct netlink_ext_ack
*extack
)
756 void *headers_c
, *headers_v
;
757 u8 prio_val
, prio_mask
= 0;
760 #ifdef CONFIG_MLX5_CORE_EN_DCB
761 if (priv
->dcbx_dp
.trust_state
!= MLX5_QPTS_TRUST_PCP
) {
762 NL_SET_ERR_MSG_MOD(extack
,
763 "only PCP trust state supported for hairpin");
767 headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
, outer_headers
);
768 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
, outer_headers
);
770 vlan_present
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
);
772 prio_mask
= MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, first_prio
);
773 prio_val
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, first_prio
);
776 if (!vlan_present
|| !prio_mask
) {
777 prio_val
= UNKNOWN_MATCH_PRIO
;
778 } else if (prio_mask
!= 0x7) {
779 NL_SET_ERR_MSG_MOD(extack
,
780 "masked priority match not supported for hairpin");
784 *match_prio
= prio_val
;
788 static int mlx5e_hairpin_flow_add(struct mlx5e_priv
*priv
,
789 struct mlx5e_tc_flow
*flow
,
790 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
791 struct netlink_ext_ack
*extack
)
793 int peer_ifindex
= parse_attr
->mirred_ifindex
[0];
794 struct mlx5_hairpin_params params
;
795 struct mlx5_core_dev
*peer_mdev
;
796 struct mlx5e_hairpin_entry
*hpe
;
797 struct mlx5e_hairpin
*hp
;
804 peer_mdev
= mlx5e_hairpin_get_mdev(dev_net(priv
->netdev
), peer_ifindex
);
805 if (IS_ERR(peer_mdev
)) {
806 NL_SET_ERR_MSG_MOD(extack
, "invalid ifindex of mirred device");
807 return PTR_ERR(peer_mdev
);
810 if (!MLX5_CAP_GEN(priv
->mdev
, hairpin
) || !MLX5_CAP_GEN(peer_mdev
, hairpin
)) {
811 NL_SET_ERR_MSG_MOD(extack
, "hairpin is not supported");
815 peer_id
= MLX5_CAP_GEN(peer_mdev
, vhca_id
);
816 err
= mlx5e_hairpin_get_prio(priv
, &parse_attr
->spec
, &match_prio
,
821 mutex_lock(&priv
->fs
.tc
.hairpin_tbl_lock
);
822 hpe
= mlx5e_hairpin_get(priv
, peer_id
, match_prio
);
824 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
825 wait_for_completion(&hpe
->res_ready
);
827 if (IS_ERR(hpe
->hp
)) {
834 hpe
= kzalloc(sizeof(*hpe
), GFP_KERNEL
);
836 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
840 spin_lock_init(&hpe
->flows_lock
);
841 INIT_LIST_HEAD(&hpe
->flows
);
842 INIT_LIST_HEAD(&hpe
->dead_peer_wait_list
);
843 hpe
->peer_vhca_id
= peer_id
;
844 hpe
->prio
= match_prio
;
845 refcount_set(&hpe
->refcnt
, 1);
846 init_completion(&hpe
->res_ready
);
848 hash_add(priv
->fs
.tc
.hairpin_tbl
, &hpe
->hairpin_hlist
,
849 hash_hairpin_info(peer_id
, match_prio
));
850 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
852 params
.log_data_size
= 16;
853 params
.log_data_size
= min_t(u8
, params
.log_data_size
,
854 MLX5_CAP_GEN(priv
->mdev
, log_max_hairpin_wq_data_sz
));
855 params
.log_data_size
= max_t(u8
, params
.log_data_size
,
856 MLX5_CAP_GEN(priv
->mdev
, log_min_hairpin_wq_data_sz
));
858 params
.log_num_packets
= params
.log_data_size
-
859 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv
->mdev
);
860 params
.log_num_packets
= min_t(u8
, params
.log_num_packets
,
861 MLX5_CAP_GEN(priv
->mdev
, log_max_hairpin_num_packets
));
863 params
.q_counter
= priv
->q_counter
;
864 /* set hairpin pair per each 50Gbs share of the link */
865 mlx5e_port_max_linkspeed(priv
->mdev
, &link_speed
);
866 link_speed
= max_t(u32
, link_speed
, 50000);
867 link_speed64
= link_speed
;
868 do_div(link_speed64
, 50000);
869 params
.num_channels
= link_speed64
;
871 hp
= mlx5e_hairpin_create(priv
, ¶ms
, peer_ifindex
);
873 complete_all(&hpe
->res_ready
);
879 netdev_dbg(priv
->netdev
, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
880 mlx5e_tir_get_tirn(&hp
->direct_tir
), hp
->pair
->rqn
[0],
881 dev_name(hp
->pair
->peer_mdev
->device
),
882 hp
->pair
->sqn
[0], match_prio
, params
.log_data_size
, params
.log_num_packets
);
885 if (hpe
->hp
->num_channels
> 1) {
886 flow_flag_set(flow
, HAIRPIN_RSS
);
887 flow
->attr
->nic_attr
->hairpin_ft
=
888 mlx5_get_ttc_flow_table(hpe
->hp
->ttc
);
890 flow
->attr
->nic_attr
->hairpin_tirn
= mlx5e_tir_get_tirn(&hpe
->hp
->direct_tir
);
894 spin_lock(&hpe
->flows_lock
);
895 list_add(&flow
->hairpin
, &hpe
->flows
);
896 spin_unlock(&hpe
->flows_lock
);
901 mlx5e_hairpin_put(priv
, hpe
);
905 static void mlx5e_hairpin_flow_del(struct mlx5e_priv
*priv
,
906 struct mlx5e_tc_flow
*flow
)
908 /* flow wasn't fully initialized */
912 spin_lock(&flow
->hpe
->flows_lock
);
913 list_del(&flow
->hairpin
);
914 spin_unlock(&flow
->hpe
->flows_lock
);
916 mlx5e_hairpin_put(priv
, flow
->hpe
);
920 struct mlx5_flow_handle
*
921 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv
*priv
,
922 struct mlx5_flow_spec
*spec
,
923 struct mlx5_flow_attr
*attr
)
925 struct mlx5_flow_context
*flow_context
= &spec
->flow_context
;
926 struct mlx5_fs_chains
*nic_chains
= nic_chains(priv
);
927 struct mlx5_nic_flow_attr
*nic_attr
= attr
->nic_attr
;
928 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
929 struct mlx5_flow_destination dest
[2] = {};
930 struct mlx5_flow_act flow_act
= {
931 .action
= attr
->action
,
932 .flags
= FLOW_ACT_NO_APPEND
,
934 struct mlx5_flow_handle
*rule
;
935 struct mlx5_flow_table
*ft
;
938 flow_context
->flags
|= FLOW_CONTEXT_HAS_TAG
;
939 flow_context
->flow_tag
= nic_attr
->flow_tag
;
942 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
943 dest
[dest_ix
].ft
= attr
->dest_ft
;
945 } else if (nic_attr
->hairpin_ft
) {
946 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
947 dest
[dest_ix
].ft
= nic_attr
->hairpin_ft
;
949 } else if (nic_attr
->hairpin_tirn
) {
950 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_TIR
;
951 dest
[dest_ix
].tir_num
= nic_attr
->hairpin_tirn
;
953 } else if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
954 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
955 if (attr
->dest_chain
) {
956 dest
[dest_ix
].ft
= mlx5_chains_get_table(nic_chains
,
959 if (IS_ERR(dest
[dest_ix
].ft
))
960 return ERR_CAST(dest
[dest_ix
].ft
);
962 dest
[dest_ix
].ft
= mlx5e_vlan_get_flowtable(priv
->fs
.vlan
);
967 if (dest
[0].type
== MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
&&
968 MLX5_CAP_FLOWTABLE_NIC_RX(priv
->mdev
, ignore_flow_level
))
969 flow_act
.flags
|= FLOW_ACT_IGNORE_FLOW_LEVEL
;
971 if (flow_act
.action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
972 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_COUNTER
;
973 dest
[dest_ix
].counter_id
= mlx5_fc_id(attr
->counter
);
977 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
978 flow_act
.modify_hdr
= attr
->modify_hdr
;
980 mutex_lock(&tc
->t_lock
);
981 if (IS_ERR_OR_NULL(tc
->t
)) {
982 /* Create the root table here if doesn't exist yet */
984 mlx5_chains_get_table(nic_chains
, 0, 1, MLX5E_TC_FT_LEVEL
);
987 mutex_unlock(&tc
->t_lock
);
988 netdev_err(priv
->netdev
,
989 "Failed to create tc offload table\n");
990 rule
= ERR_CAST(priv
->fs
.tc
.t
);
994 mutex_unlock(&tc
->t_lock
);
996 if (attr
->chain
|| attr
->prio
)
997 ft
= mlx5_chains_get_table(nic_chains
,
998 attr
->chain
, attr
->prio
,
1004 rule
= ERR_CAST(ft
);
1008 if (attr
->outer_match_level
!= MLX5_MATCH_NONE
)
1009 spec
->match_criteria_enable
|= MLX5_MATCH_OUTER_HEADERS
;
1011 rule
= mlx5_add_flow_rules(ft
, spec
,
1012 &flow_act
, dest
, dest_ix
);
1019 if (attr
->chain
|| attr
->prio
)
1020 mlx5_chains_put_table(nic_chains
,
1021 attr
->chain
, attr
->prio
,
1024 if (attr
->dest_chain
)
1025 mlx5_chains_put_table(nic_chains
,
1026 attr
->dest_chain
, 1,
1029 return ERR_CAST(rule
);
1033 mlx5e_tc_add_nic_flow(struct mlx5e_priv
*priv
,
1034 struct mlx5e_tc_flow
*flow
,
1035 struct netlink_ext_ack
*extack
)
1037 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
1038 struct mlx5_flow_attr
*attr
= flow
->attr
;
1039 struct mlx5_core_dev
*dev
= priv
->mdev
;
1040 struct mlx5_fc
*counter
;
1043 parse_attr
= attr
->parse_attr
;
1045 if (flow_flag_test(flow
, HAIRPIN
)) {
1046 err
= mlx5e_hairpin_flow_add(priv
, flow
, parse_attr
, extack
);
1051 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
1052 counter
= mlx5_fc_create(dev
, true);
1053 if (IS_ERR(counter
))
1054 return PTR_ERR(counter
);
1056 attr
->counter
= counter
;
1059 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
1060 err
= mlx5e_attach_mod_hdr(priv
, flow
, parse_attr
);
1061 dealloc_mod_hdr_actions(&parse_attr
->mod_hdr_acts
);
1066 if (flow_flag_test(flow
, CT
))
1067 flow
->rule
[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv
), flow
, &parse_attr
->spec
,
1068 attr
, &parse_attr
->mod_hdr_acts
);
1070 flow
->rule
[0] = mlx5e_add_offloaded_nic_rule(priv
, &parse_attr
->spec
,
1073 return PTR_ERR_OR_ZERO(flow
->rule
[0]);
1076 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv
*priv
,
1077 struct mlx5_flow_handle
*rule
,
1078 struct mlx5_flow_attr
*attr
)
1080 struct mlx5_fs_chains
*nic_chains
= nic_chains(priv
);
1082 mlx5_del_flow_rules(rule
);
1084 if (attr
->chain
|| attr
->prio
)
1085 mlx5_chains_put_table(nic_chains
, attr
->chain
, attr
->prio
,
1088 if (attr
->dest_chain
)
1089 mlx5_chains_put_table(nic_chains
, attr
->dest_chain
, 1,
1093 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv
*priv
,
1094 struct mlx5e_tc_flow
*flow
)
1096 struct mlx5_flow_attr
*attr
= flow
->attr
;
1097 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
1099 flow_flag_clear(flow
, OFFLOADED
);
1101 if (flow_flag_test(flow
, CT
))
1102 mlx5_tc_ct_delete_flow(get_ct_priv(flow
->priv
), flow
, attr
);
1103 else if (!IS_ERR_OR_NULL(flow
->rule
[0]))
1104 mlx5e_del_offloaded_nic_rule(priv
, flow
->rule
[0], attr
);
1106 /* Remove root table if no rules are left to avoid
1107 * extra steering hops.
1109 mutex_lock(&priv
->fs
.tc
.t_lock
);
1110 if (!mlx5e_tc_num_filters(priv
, MLX5_TC_FLAG(NIC_OFFLOAD
)) &&
1111 !IS_ERR_OR_NULL(tc
->t
)) {
1112 mlx5_chains_put_table(nic_chains(priv
), 0, 1, MLX5E_TC_FT_LEVEL
);
1113 priv
->fs
.tc
.t
= NULL
;
1115 mutex_unlock(&priv
->fs
.tc
.t_lock
);
1117 kvfree(attr
->parse_attr
);
1119 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
1120 mlx5e_detach_mod_hdr(priv
, flow
);
1122 mlx5_fc_destroy(priv
->mdev
, attr
->counter
);
1124 if (flow_flag_test(flow
, HAIRPIN
))
1125 mlx5e_hairpin_flow_del(priv
, flow
);
1130 struct mlx5_flow_handle
*
1131 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch
*esw
,
1132 struct mlx5e_tc_flow
*flow
,
1133 struct mlx5_flow_spec
*spec
,
1134 struct mlx5_flow_attr
*attr
)
1136 struct mlx5e_tc_mod_hdr_acts
*mod_hdr_acts
;
1137 struct mlx5_flow_handle
*rule
;
1139 if (attr
->flags
& MLX5_ESW_ATTR_FLAG_SLOW_PATH
)
1140 return mlx5_eswitch_add_offloaded_rule(esw
, spec
, attr
);
1142 if (flow_flag_test(flow
, CT
)) {
1143 mod_hdr_acts
= &attr
->parse_attr
->mod_hdr_acts
;
1145 rule
= mlx5_tc_ct_flow_offload(get_ct_priv(flow
->priv
),
1148 } else if (flow_flag_test(flow
, SAMPLE
)) {
1149 rule
= mlx5e_tc_sample_offload(get_sample_priv(flow
->priv
), spec
, attr
,
1150 mlx5e_tc_get_flow_tun_id(flow
));
1152 rule
= mlx5_eswitch_add_offloaded_rule(esw
, spec
, attr
);
1158 if (attr
->esw_attr
->split_count
) {
1159 flow
->rule
[1] = mlx5_eswitch_add_fwd_rule(esw
, spec
, attr
);
1160 if (IS_ERR(flow
->rule
[1])) {
1161 if (flow_flag_test(flow
, CT
))
1162 mlx5_tc_ct_delete_flow(get_ct_priv(flow
->priv
), flow
, attr
);
1164 mlx5_eswitch_del_offloaded_rule(esw
, rule
, attr
);
1165 return flow
->rule
[1];
1172 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch
*esw
,
1173 struct mlx5e_tc_flow
*flow
,
1174 struct mlx5_flow_attr
*attr
)
1176 flow_flag_clear(flow
, OFFLOADED
);
1178 if (attr
->flags
& MLX5_ESW_ATTR_FLAG_SLOW_PATH
)
1179 goto offload_rule_0
;
1181 if (attr
->esw_attr
->split_count
)
1182 mlx5_eswitch_del_fwd_rule(esw
, flow
->rule
[1], attr
);
1184 if (flow_flag_test(flow
, CT
))
1185 mlx5_tc_ct_delete_flow(get_ct_priv(flow
->priv
), flow
, attr
);
1186 else if (flow_flag_test(flow
, SAMPLE
))
1187 mlx5e_tc_sample_unoffload(get_sample_priv(flow
->priv
), flow
->rule
[0], attr
);
1190 mlx5_eswitch_del_offloaded_rule(esw
, flow
->rule
[0], attr
);
1193 struct mlx5_flow_handle
*
1194 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch
*esw
,
1195 struct mlx5e_tc_flow
*flow
,
1196 struct mlx5_flow_spec
*spec
)
1198 struct mlx5_flow_attr
*slow_attr
;
1199 struct mlx5_flow_handle
*rule
;
1201 slow_attr
= mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB
);
1203 return ERR_PTR(-ENOMEM
);
1205 memcpy(slow_attr
, flow
->attr
, ESW_FLOW_ATTR_SZ
);
1206 slow_attr
->action
= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1207 slow_attr
->esw_attr
->split_count
= 0;
1208 slow_attr
->flags
|= MLX5_ESW_ATTR_FLAG_SLOW_PATH
;
1210 rule
= mlx5e_tc_offload_fdb_rules(esw
, flow
, spec
, slow_attr
);
1212 flow_flag_set(flow
, SLOW
);
1219 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch
*esw
,
1220 struct mlx5e_tc_flow
*flow
)
1222 struct mlx5_flow_attr
*slow_attr
;
1224 slow_attr
= mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB
);
1226 mlx5_core_warn(flow
->priv
->mdev
, "Unable to alloc attr to unoffload slow path rule\n");
1230 memcpy(slow_attr
, flow
->attr
, ESW_FLOW_ATTR_SZ
);
1231 slow_attr
->action
= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1232 slow_attr
->esw_attr
->split_count
= 0;
1233 slow_attr
->flags
|= MLX5_ESW_ATTR_FLAG_SLOW_PATH
;
1234 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, slow_attr
);
1235 flow_flag_clear(flow
, SLOW
);
1239 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1242 static void unready_flow_add(struct mlx5e_tc_flow
*flow
,
1243 struct list_head
*unready_flows
)
1245 flow_flag_set(flow
, NOT_READY
);
1246 list_add_tail(&flow
->unready
, unready_flows
);
1249 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1252 static void unready_flow_del(struct mlx5e_tc_flow
*flow
)
1254 list_del(&flow
->unready
);
1255 flow_flag_clear(flow
, NOT_READY
);
1258 static void add_unready_flow(struct mlx5e_tc_flow
*flow
)
1260 struct mlx5_rep_uplink_priv
*uplink_priv
;
1261 struct mlx5e_rep_priv
*rpriv
;
1262 struct mlx5_eswitch
*esw
;
1264 esw
= flow
->priv
->mdev
->priv
.eswitch
;
1265 rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
1266 uplink_priv
= &rpriv
->uplink_priv
;
1268 mutex_lock(&uplink_priv
->unready_flows_lock
);
1269 unready_flow_add(flow
, &uplink_priv
->unready_flows
);
1270 mutex_unlock(&uplink_priv
->unready_flows_lock
);
1273 static void remove_unready_flow(struct mlx5e_tc_flow
*flow
)
1275 struct mlx5_rep_uplink_priv
*uplink_priv
;
1276 struct mlx5e_rep_priv
*rpriv
;
1277 struct mlx5_eswitch
*esw
;
1279 esw
= flow
->priv
->mdev
->priv
.eswitch
;
1280 rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
1281 uplink_priv
= &rpriv
->uplink_priv
;
1283 mutex_lock(&uplink_priv
->unready_flows_lock
);
1284 unready_flow_del(flow
);
1285 mutex_unlock(&uplink_priv
->unready_flows_lock
);
1288 static bool same_hw_devs(struct mlx5e_priv
*priv
, struct mlx5e_priv
*peer_priv
);
1290 bool mlx5e_tc_is_vf_tunnel(struct net_device
*out_dev
, struct net_device
*route_dev
)
1292 struct mlx5_core_dev
*out_mdev
, *route_mdev
;
1293 struct mlx5e_priv
*out_priv
, *route_priv
;
1295 out_priv
= netdev_priv(out_dev
);
1296 out_mdev
= out_priv
->mdev
;
1297 route_priv
= netdev_priv(route_dev
);
1298 route_mdev
= route_priv
->mdev
;
1300 if (out_mdev
->coredev_type
!= MLX5_COREDEV_PF
||
1301 route_mdev
->coredev_type
!= MLX5_COREDEV_VF
)
1304 return same_hw_devs(out_priv
, route_priv
);
1307 int mlx5e_tc_query_route_vport(struct net_device
*out_dev
, struct net_device
*route_dev
, u16
*vport
)
1309 struct mlx5e_priv
*out_priv
, *route_priv
;
1310 struct mlx5_devcom
*devcom
= NULL
;
1311 struct mlx5_core_dev
*route_mdev
;
1312 struct mlx5_eswitch
*esw
;
1316 out_priv
= netdev_priv(out_dev
);
1317 esw
= out_priv
->mdev
->priv
.eswitch
;
1318 route_priv
= netdev_priv(route_dev
);
1319 route_mdev
= route_priv
->mdev
;
1321 vhca_id
= MLX5_CAP_GEN(route_mdev
, vhca_id
);
1322 if (mlx5_lag_is_active(out_priv
->mdev
)) {
1323 /* In lag case we may get devices from different eswitch instances.
1324 * If we failed to get vport num, it means, mostly, that we on the wrong
1327 err
= mlx5_eswitch_vhca_id_to_vport(esw
, vhca_id
, vport
);
1331 devcom
= out_priv
->mdev
->priv
.devcom
;
1332 esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
1337 err
= mlx5_eswitch_vhca_id_to_vport(esw
, vhca_id
, vport
);
1339 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
1343 int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv
*priv
,
1344 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
1345 struct mlx5e_tc_flow
*flow
)
1347 struct mlx5e_tc_mod_hdr_acts
*mod_hdr_acts
= &parse_attr
->mod_hdr_acts
;
1348 struct mlx5_modify_hdr
*mod_hdr
;
1350 mod_hdr
= mlx5_modify_header_alloc(priv
->mdev
,
1351 get_flow_name_space(flow
),
1352 mod_hdr_acts
->num_actions
,
1353 mod_hdr_acts
->actions
);
1354 if (IS_ERR(mod_hdr
))
1355 return PTR_ERR(mod_hdr
);
1357 WARN_ON(flow
->attr
->modify_hdr
);
1358 flow
->attr
->modify_hdr
= mod_hdr
;
1364 mlx5e_tc_add_fdb_flow(struct mlx5e_priv
*priv
,
1365 struct mlx5e_tc_flow
*flow
,
1366 struct netlink_ext_ack
*extack
)
1368 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1369 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
1370 struct mlx5_flow_attr
*attr
= flow
->attr
;
1371 bool vf_tun
= false, encap_valid
= true;
1372 struct net_device
*encap_dev
= NULL
;
1373 struct mlx5_esw_flow_attr
*esw_attr
;
1374 struct mlx5e_rep_priv
*rpriv
;
1375 struct mlx5e_priv
*out_priv
;
1376 struct mlx5_fc
*counter
;
1377 u32 max_prio
, max_chain
;
1381 /* We check chain range only for tc flows.
1382 * For ft flows, we checked attr->chain was originally 0 and set it to
1383 * FDB_FT_CHAIN which is outside tc range.
1384 * See mlx5e_rep_setup_ft_cb().
1386 max_chain
= mlx5_chains_get_chain_range(esw_chains(esw
));
1387 if (!mlx5e_is_ft_flow(flow
) && attr
->chain
> max_chain
) {
1388 NL_SET_ERR_MSG_MOD(extack
,
1389 "Requested chain is out of supported range");
1394 max_prio
= mlx5_chains_get_prio_range(esw_chains(esw
));
1395 if (attr
->prio
> max_prio
) {
1396 NL_SET_ERR_MSG_MOD(extack
,
1397 "Requested priority is out of supported range");
1402 if (flow_flag_test(flow
, TUN_RX
)) {
1403 err
= mlx5e_attach_decap_route(priv
, flow
);
1408 if (flow_flag_test(flow
, L3_TO_L2_DECAP
)) {
1409 err
= mlx5e_attach_decap(priv
, flow
, extack
);
1414 parse_attr
= attr
->parse_attr
;
1415 esw_attr
= attr
->esw_attr
;
1417 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++) {
1418 struct net_device
*out_dev
;
1421 if (!(esw_attr
->dests
[out_index
].flags
& MLX5_ESW_DEST_ENCAP
))
1424 mirred_ifindex
= parse_attr
->mirred_ifindex
[out_index
];
1425 out_dev
= dev_get_by_index(dev_net(priv
->netdev
), mirred_ifindex
);
1427 NL_SET_ERR_MSG_MOD(extack
, "Requested mirred device not found");
1431 err
= mlx5e_attach_encap(priv
, flow
, out_dev
, out_index
,
1432 extack
, &encap_dev
, &encap_valid
);
1437 if (esw_attr
->dests
[out_index
].flags
&
1438 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE
)
1440 out_priv
= netdev_priv(encap_dev
);
1441 rpriv
= out_priv
->ppriv
;
1442 esw_attr
->dests
[out_index
].rep
= rpriv
->rep
;
1443 esw_attr
->dests
[out_index
].mdev
= out_priv
->mdev
;
1446 if (vf_tun
&& esw_attr
->out_count
> 1) {
1447 NL_SET_ERR_MSG_MOD(extack
, "VF tunnel encap with mirroring is not supported");
1452 err
= mlx5_eswitch_add_vlan_action(esw
, attr
);
1456 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
&&
1457 !(attr
->ct_attr
.ct_action
& TCA_CT_ACT_CLEAR
)) {
1459 err
= mlx5e_tc_add_flow_mod_hdr(priv
, parse_attr
, flow
);
1463 err
= mlx5e_attach_mod_hdr(priv
, flow
, parse_attr
);
1469 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
1470 counter
= mlx5_fc_create(esw_attr
->counter_dev
, true);
1471 if (IS_ERR(counter
)) {
1472 err
= PTR_ERR(counter
);
1476 attr
->counter
= counter
;
1479 /* we get here if one of the following takes place:
1480 * (1) there's no error
1481 * (2) there's an encap action and we don't have valid neigh
1484 flow
->rule
[0] = mlx5e_tc_offload_to_slow_path(esw
, flow
, &parse_attr
->spec
);
1486 flow
->rule
[0] = mlx5e_tc_offload_fdb_rules(esw
, flow
, &parse_attr
->spec
, attr
);
1488 if (IS_ERR(flow
->rule
[0])) {
1489 err
= PTR_ERR(flow
->rule
[0]);
1492 flow_flag_set(flow
, OFFLOADED
);
1497 flow_flag_set(flow
, FAILED
);
1501 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow
*flow
)
1503 struct mlx5_flow_spec
*spec
= &flow
->attr
->parse_attr
->spec
;
1504 void *headers_v
= MLX5_ADDR_OF(fte_match_param
,
1507 u32 geneve_tlv_opt_0_data
= MLX5_GET(fte_match_set_misc3
,
1509 geneve_tlv_option_0_data
);
1511 return !!geneve_tlv_opt_0_data
;
1514 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv
*priv
,
1515 struct mlx5e_tc_flow
*flow
)
1517 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1518 struct mlx5_flow_attr
*attr
= flow
->attr
;
1519 struct mlx5_esw_flow_attr
*esw_attr
;
1520 bool vf_tun
= false;
1523 esw_attr
= attr
->esw_attr
;
1524 mlx5e_put_flow_tunnel_id(flow
);
1526 if (flow_flag_test(flow
, NOT_READY
))
1527 remove_unready_flow(flow
);
1529 if (mlx5e_is_offloaded_flow(flow
)) {
1530 if (flow_flag_test(flow
, SLOW
))
1531 mlx5e_tc_unoffload_from_slow_path(esw
, flow
);
1533 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, attr
);
1535 complete_all(&flow
->del_hw_done
);
1537 if (mlx5_flow_has_geneve_opt(flow
))
1538 mlx5_geneve_tlv_option_del(priv
->mdev
->geneve
);
1540 mlx5_eswitch_del_vlan_action(esw
, attr
);
1542 if (flow
->decap_route
)
1543 mlx5e_detach_decap_route(priv
, flow
);
1545 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++) {
1546 if (esw_attr
->dests
[out_index
].flags
&
1547 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE
)
1549 if (esw_attr
->dests
[out_index
].flags
& MLX5_ESW_DEST_ENCAP
) {
1550 mlx5e_detach_encap(priv
, flow
, out_index
);
1551 kfree(attr
->parse_attr
->tun_info
[out_index
]);
1555 mlx5_tc_ct_match_del(get_ct_priv(priv
), &flow
->attr
->ct_attr
);
1557 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
1558 dealloc_mod_hdr_actions(&attr
->parse_attr
->mod_hdr_acts
);
1559 if (vf_tun
&& attr
->modify_hdr
)
1560 mlx5_modify_header_dealloc(priv
->mdev
, attr
->modify_hdr
);
1562 mlx5e_detach_mod_hdr(priv
, flow
);
1564 kfree(attr
->sample_attr
);
1565 kvfree(attr
->parse_attr
);
1566 kvfree(attr
->esw_attr
->rx_tun_attr
);
1568 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
)
1569 mlx5_fc_destroy(esw_attr
->counter_dev
, attr
->counter
);
1571 if (flow_flag_test(flow
, L3_TO_L2_DECAP
))
1572 mlx5e_detach_decap(priv
, flow
);
1577 struct mlx5_fc
*mlx5e_tc_get_counter(struct mlx5e_tc_flow
*flow
)
1579 return flow
->attr
->counter
;
1582 /* Iterate over tmp_list of flows attached to flow_list head. */
1583 void mlx5e_put_flow_list(struct mlx5e_priv
*priv
, struct list_head
*flow_list
)
1585 struct mlx5e_tc_flow
*flow
, *tmp
;
1587 list_for_each_entry_safe(flow
, tmp
, flow_list
, tmp_list
)
1588 mlx5e_flow_put(priv
, flow
);
1591 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow
*flow
)
1593 struct mlx5_eswitch
*esw
= flow
->priv
->mdev
->priv
.eswitch
;
1595 if (!flow_flag_test(flow
, ESWITCH
) ||
1596 !flow_flag_test(flow
, DUP
))
1599 mutex_lock(&esw
->offloads
.peer_mutex
);
1600 list_del(&flow
->peer
);
1601 mutex_unlock(&esw
->offloads
.peer_mutex
);
1603 flow_flag_clear(flow
, DUP
);
1605 if (refcount_dec_and_test(&flow
->peer_flow
->refcnt
)) {
1606 mlx5e_tc_del_fdb_flow(flow
->peer_flow
->priv
, flow
->peer_flow
);
1607 kfree(flow
->peer_flow
);
1610 flow
->peer_flow
= NULL
;
1613 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow
*flow
)
1615 struct mlx5_core_dev
*dev
= flow
->priv
->mdev
;
1616 struct mlx5_devcom
*devcom
= dev
->priv
.devcom
;
1617 struct mlx5_eswitch
*peer_esw
;
1619 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
1623 __mlx5e_tc_del_fdb_peer_flow(flow
);
1624 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
1627 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
1628 struct mlx5e_tc_flow
*flow
)
1630 if (mlx5e_is_eswitch_flow(flow
)) {
1631 mlx5e_tc_del_fdb_peer_flow(flow
);
1632 mlx5e_tc_del_fdb_flow(priv
, flow
);
1634 mlx5e_tc_del_nic_flow(priv
, flow
);
1638 static bool flow_requires_tunnel_mapping(u32 chain
, struct flow_cls_offload
*f
)
1640 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
1641 struct flow_action
*flow_action
= &rule
->action
;
1642 const struct flow_action_entry
*act
;
1648 flow_action_for_each(i
, act
, flow_action
) {
1650 case FLOW_ACTION_GOTO
:
1652 case FLOW_ACTION_SAMPLE
:
1663 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv
*priv
,
1664 struct flow_dissector_key_enc_opts
*opts
,
1665 struct netlink_ext_ack
*extack
,
1668 struct geneve_opt
*opt
;
1673 while (opts
->len
> off
) {
1674 opt
= (struct geneve_opt
*)&opts
->data
[off
];
1676 if (!(*dont_care
) || opt
->opt_class
|| opt
->type
||
1677 memchr_inv(opt
->opt_data
, 0, opt
->length
* 4)) {
1680 if (opt
->opt_class
!= htons(U16_MAX
) ||
1681 opt
->type
!= U8_MAX
) {
1682 NL_SET_ERR_MSG(extack
,
1683 "Partial match of tunnel options in chain > 0 isn't supported");
1684 netdev_warn(priv
->netdev
,
1685 "Partial match of tunnel options in chain > 0 isn't supported");
1690 off
+= sizeof(struct geneve_opt
) + opt
->length
* 4;
1696 #define COPY_DISSECTOR(rule, diss_key, dst)\
1698 struct flow_rule *__rule = (rule);\
1699 typeof(dst) __dst = dst;\
1702 skb_flow_dissector_target(__rule->match.dissector,\
1704 __rule->match.key),\
1708 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv
*priv
,
1709 struct mlx5e_tc_flow
*flow
,
1710 struct flow_cls_offload
*f
,
1711 struct net_device
*filter_dev
)
1713 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
1714 struct netlink_ext_ack
*extack
= f
->common
.extack
;
1715 struct mlx5e_tc_mod_hdr_acts
*mod_hdr_acts
;
1716 struct flow_match_enc_opts enc_opts_match
;
1717 struct tunnel_match_enc_opts tun_enc_opts
;
1718 struct mlx5_rep_uplink_priv
*uplink_priv
;
1719 struct mlx5_flow_attr
*attr
= flow
->attr
;
1720 struct mlx5e_rep_priv
*uplink_rpriv
;
1721 struct tunnel_match_key tunnel_key
;
1722 bool enc_opts_is_dont_care
= true;
1723 u32 tun_id
, enc_opts_id
= 0;
1724 struct mlx5_eswitch
*esw
;
1728 esw
= priv
->mdev
->priv
.eswitch
;
1729 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
1730 uplink_priv
= &uplink_rpriv
->uplink_priv
;
1732 memset(&tunnel_key
, 0, sizeof(tunnel_key
));
1733 COPY_DISSECTOR(rule
, FLOW_DISSECTOR_KEY_ENC_CONTROL
,
1734 &tunnel_key
.enc_control
);
1735 if (tunnel_key
.enc_control
.addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
)
1736 COPY_DISSECTOR(rule
, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
,
1737 &tunnel_key
.enc_ipv4
);
1739 COPY_DISSECTOR(rule
, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
,
1740 &tunnel_key
.enc_ipv6
);
1741 COPY_DISSECTOR(rule
, FLOW_DISSECTOR_KEY_ENC_IP
, &tunnel_key
.enc_ip
);
1742 COPY_DISSECTOR(rule
, FLOW_DISSECTOR_KEY_ENC_PORTS
,
1743 &tunnel_key
.enc_tp
);
1744 COPY_DISSECTOR(rule
, FLOW_DISSECTOR_KEY_ENC_KEYID
,
1745 &tunnel_key
.enc_key_id
);
1746 tunnel_key
.filter_ifindex
= filter_dev
->ifindex
;
1748 err
= mapping_add(uplink_priv
->tunnel_mapping
, &tunnel_key
, &tun_id
);
1752 flow_rule_match_enc_opts(rule
, &enc_opts_match
);
1753 err
= enc_opts_is_dont_care_or_full_match(priv
,
1754 enc_opts_match
.mask
,
1756 &enc_opts_is_dont_care
);
1760 if (!enc_opts_is_dont_care
) {
1761 memset(&tun_enc_opts
, 0, sizeof(tun_enc_opts
));
1762 memcpy(&tun_enc_opts
.key
, enc_opts_match
.key
,
1763 sizeof(*enc_opts_match
.key
));
1764 memcpy(&tun_enc_opts
.mask
, enc_opts_match
.mask
,
1765 sizeof(*enc_opts_match
.mask
));
1767 err
= mapping_add(uplink_priv
->tunnel_enc_opts_mapping
,
1768 &tun_enc_opts
, &enc_opts_id
);
1773 value
= tun_id
<< ENC_OPTS_BITS
| enc_opts_id
;
1774 mask
= enc_opts_id
? TUNNEL_ID_MASK
:
1775 (TUNNEL_ID_MASK
& ~ENC_OPTS_BITS_MASK
);
1778 mlx5e_tc_match_to_reg_match(&attr
->parse_attr
->spec
,
1779 TUNNEL_TO_REG
, value
, mask
);
1781 mod_hdr_acts
= &attr
->parse_attr
->mod_hdr_acts
;
1782 err
= mlx5e_tc_match_to_reg_set(priv
->mdev
,
1783 mod_hdr_acts
, MLX5_FLOW_NAMESPACE_FDB
,
1784 TUNNEL_TO_REG
, value
);
1788 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
1791 flow
->tunnel_id
= value
;
1796 mapping_remove(uplink_priv
->tunnel_enc_opts_mapping
,
1799 mapping_remove(uplink_priv
->tunnel_mapping
, tun_id
);
1803 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow
*flow
)
1805 u32 enc_opts_id
= flow
->tunnel_id
& ENC_OPTS_BITS_MASK
;
1806 u32 tun_id
= flow
->tunnel_id
>> ENC_OPTS_BITS
;
1807 struct mlx5_rep_uplink_priv
*uplink_priv
;
1808 struct mlx5e_rep_priv
*uplink_rpriv
;
1809 struct mlx5_eswitch
*esw
;
1811 esw
= flow
->priv
->mdev
->priv
.eswitch
;
1812 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
1813 uplink_priv
= &uplink_rpriv
->uplink_priv
;
1816 mapping_remove(uplink_priv
->tunnel_mapping
, tun_id
);
1818 mapping_remove(uplink_priv
->tunnel_enc_opts_mapping
,
1822 u32
mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow
*flow
)
1824 return flow
->tunnel_id
;
1827 void mlx5e_tc_set_ethertype(struct mlx5_core_dev
*mdev
,
1828 struct flow_match_basic
*match
, bool outer
,
1829 void *headers_c
, void *headers_v
)
1831 bool ip_version_cap
;
1833 ip_version_cap
= outer
?
1834 MLX5_CAP_FLOWTABLE_NIC_RX(mdev
,
1835 ft_field_support
.outer_ip_version
) :
1836 MLX5_CAP_FLOWTABLE_NIC_RX(mdev
,
1837 ft_field_support
.inner_ip_version
);
1839 if (ip_version_cap
&& match
->mask
->n_proto
== htons(0xFFFF) &&
1840 (match
->key
->n_proto
== htons(ETH_P_IP
) ||
1841 match
->key
->n_proto
== htons(ETH_P_IPV6
))) {
1842 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ip_version
);
1843 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_version
,
1844 match
->key
->n_proto
== htons(ETH_P_IP
) ? 4 : 6);
1846 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ethertype
,
1847 ntohs(match
->mask
->n_proto
));
1848 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
1849 ntohs(match
->key
->n_proto
));
1853 u8
mlx5e_tc_get_ip_version(struct mlx5_flow_spec
*spec
, bool outer
)
1860 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
, outer_headers
);
1862 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
, inner_headers
);
1864 ip_version
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ip_version
);
1865 /* Return ip_version converted from ethertype anyway */
1867 ethertype
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ethertype
);
1868 if (ethertype
== ETH_P_IP
|| ethertype
== ETH_P_ARP
)
1870 else if (ethertype
== ETH_P_IPV6
)
1876 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
1877 * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
1878 * +---------+----------------------------------------+
1879 * |Arriving | Arriving Outer Header |
1880 * | Inner +---------+---------+---------+----------+
1881 * | Header | Not-ECT | ECT(0) | ECT(1) | CE |
1882 * +---------+---------+---------+---------+----------+
1883 * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> |
1884 * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* |
1885 * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* |
1886 * | CE | CE | CE | CE | CE |
1887 * +---------+---------+---------+---------+----------+
1889 * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
1890 * the inner ip_ecn value before hardware decap action.
1892 * Cells marked are changed from original inner packet ip_ecn value during decap, and
1893 * so matching those values on inner ip_ecn before decap will fail.
1895 * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
1896 * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
1897 * and such we can drop the inner ip_ecn=CE match.
1900 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv
*priv
,
1901 struct flow_cls_offload
*f
,
1902 bool *match_inner_ecn
)
1904 u8 outer_ecn_mask
= 0, outer_ecn_key
= 0, inner_ecn_mask
= 0, inner_ecn_key
= 0;
1905 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
1906 struct netlink_ext_ack
*extack
= f
->common
.extack
;
1907 struct flow_match_ip match
;
1909 *match_inner_ecn
= true;
1911 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ENC_IP
)) {
1912 flow_rule_match_enc_ip(rule
, &match
);
1913 outer_ecn_key
= match
.key
->tos
& INET_ECN_MASK
;
1914 outer_ecn_mask
= match
.mask
->tos
& INET_ECN_MASK
;
1917 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_IP
)) {
1918 flow_rule_match_ip(rule
, &match
);
1919 inner_ecn_key
= match
.key
->tos
& INET_ECN_MASK
;
1920 inner_ecn_mask
= match
.mask
->tos
& INET_ECN_MASK
;
1923 if (outer_ecn_mask
!= 0 && outer_ecn_mask
!= INET_ECN_MASK
) {
1924 NL_SET_ERR_MSG_MOD(extack
, "Partial match on enc_tos ecn bits isn't supported");
1925 netdev_warn(priv
->netdev
, "Partial match on enc_tos ecn bits isn't supported");
1929 if (!outer_ecn_mask
) {
1930 if (!inner_ecn_mask
)
1933 NL_SET_ERR_MSG_MOD(extack
,
1934 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
1935 netdev_warn(priv
->netdev
,
1936 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
1940 if (inner_ecn_mask
&& inner_ecn_mask
!= INET_ECN_MASK
) {
1941 NL_SET_ERR_MSG_MOD(extack
,
1942 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
1943 netdev_warn(priv
->netdev
,
1944 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
1948 if (!inner_ecn_mask
)
1951 /* Both inner and outer have full mask on ecn */
1953 if (outer_ecn_key
== INET_ECN_ECT_1
) {
1954 /* inner ecn might change by DECAP action */
1956 NL_SET_ERR_MSG_MOD(extack
, "Match on enc_tos ecn = ECT(1) isn't supported");
1957 netdev_warn(priv
->netdev
, "Match on enc_tos ecn = ECT(1) isn't supported");
1961 if (outer_ecn_key
!= INET_ECN_CE
)
1964 if (inner_ecn_key
!= INET_ECN_CE
) {
1965 /* Can't happen in software, as packet ecn will be changed to CE after decap */
1966 NL_SET_ERR_MSG_MOD(extack
,
1967 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
1968 netdev_warn(priv
->netdev
,
1969 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
1973 /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
1974 * drop match on inner ecn
1976 *match_inner_ecn
= false;
1981 static int parse_tunnel_attr(struct mlx5e_priv
*priv
,
1982 struct mlx5e_tc_flow
*flow
,
1983 struct mlx5_flow_spec
*spec
,
1984 struct flow_cls_offload
*f
,
1985 struct net_device
*filter_dev
,
1989 struct mlx5e_tc_tunnel
*tunnel
= mlx5e_get_tc_tun(filter_dev
);
1990 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1991 struct netlink_ext_ack
*extack
= f
->common
.extack
;
1992 bool needs_mapping
, sets_mapping
;
1995 if (!mlx5e_is_eswitch_flow(flow
))
1998 needs_mapping
= !!flow
->attr
->chain
;
1999 sets_mapping
= flow_requires_tunnel_mapping(flow
->attr
->chain
, f
);
2000 *match_inner
= !needs_mapping
;
2002 if ((needs_mapping
|| sets_mapping
) &&
2003 !mlx5_eswitch_reg_c1_loopback_enabled(esw
)) {
2004 NL_SET_ERR_MSG(extack
,
2005 "Chains on tunnel devices isn't supported without register loopback support");
2006 netdev_warn(priv
->netdev
,
2007 "Chains on tunnel devices isn't supported without register loopback support");
2011 if (!flow
->attr
->chain
) {
2012 err
= mlx5e_tc_tun_parse(filter_dev
, priv
, spec
, f
,
2015 NL_SET_ERR_MSG_MOD(extack
,
2016 "Failed to parse tunnel attributes");
2017 netdev_warn(priv
->netdev
,
2018 "Failed to parse tunnel attributes");
2022 /* With mpls over udp we decapsulate using packet reformat
2025 if (!netif_is_bareudp(filter_dev
))
2026 flow
->attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DECAP
;
2027 err
= mlx5e_tc_set_attr_rx_tun(flow
, spec
);
2030 } else if (tunnel
&& tunnel
->tunnel_type
== MLX5E_TC_TUNNEL_TYPE_VXLAN
) {
2031 struct mlx5_flow_spec
*tmp_spec
;
2033 tmp_spec
= kvzalloc(sizeof(*tmp_spec
), GFP_KERNEL
);
2035 NL_SET_ERR_MSG_MOD(extack
, "Failed to allocate memory for vxlan tmp spec");
2036 netdev_warn(priv
->netdev
, "Failed to allocate memory for vxlan tmp spec");
2039 memcpy(tmp_spec
, spec
, sizeof(*tmp_spec
));
2041 err
= mlx5e_tc_tun_parse(filter_dev
, priv
, tmp_spec
, f
, match_level
);
2044 NL_SET_ERR_MSG_MOD(extack
, "Failed to parse tunnel attributes");
2045 netdev_warn(priv
->netdev
, "Failed to parse tunnel attributes");
2048 err
= mlx5e_tc_set_attr_rx_tun(flow
, tmp_spec
);
2054 if (!needs_mapping
&& !sets_mapping
)
2057 return mlx5e_get_flow_tunnel_id(priv
, flow
, f
, filter_dev
);
2060 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec
*spec
)
2062 return MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
2066 static void *get_match_inner_headers_value(struct mlx5_flow_spec
*spec
)
2068 return MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
2072 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec
*spec
)
2074 return MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
2078 static void *get_match_outer_headers_value(struct mlx5_flow_spec
*spec
)
2080 return MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
2084 static void *get_match_headers_value(u32 flags
,
2085 struct mlx5_flow_spec
*spec
)
2087 return (flags
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ?
2088 get_match_inner_headers_value(spec
) :
2089 get_match_outer_headers_value(spec
);
2092 static void *get_match_headers_criteria(u32 flags
,
2093 struct mlx5_flow_spec
*spec
)
2095 return (flags
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ?
2096 get_match_inner_headers_criteria(spec
) :
2097 get_match_outer_headers_criteria(spec
);
2100 static int mlx5e_flower_parse_meta(struct net_device
*filter_dev
,
2101 struct flow_cls_offload
*f
)
2103 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
2104 struct netlink_ext_ack
*extack
= f
->common
.extack
;
2105 struct net_device
*ingress_dev
;
2106 struct flow_match_meta match
;
2108 if (!flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_META
))
2111 flow_rule_match_meta(rule
, &match
);
2112 if (!match
.mask
->ingress_ifindex
)
2115 if (match
.mask
->ingress_ifindex
!= 0xFFFFFFFF) {
2116 NL_SET_ERR_MSG_MOD(extack
, "Unsupported ingress ifindex mask");
2120 ingress_dev
= __dev_get_by_index(dev_net(filter_dev
),
2121 match
.key
->ingress_ifindex
);
2123 NL_SET_ERR_MSG_MOD(extack
,
2124 "Can't find the ingress port to match on");
2128 if (ingress_dev
!= filter_dev
) {
2129 NL_SET_ERR_MSG_MOD(extack
,
2130 "Can't match on the ingress filter port");
2137 static bool skip_key_basic(struct net_device
*filter_dev
,
2138 struct flow_cls_offload
*f
)
2140 /* When doing mpls over udp decap, the user needs to provide
2141 * MPLS_UC as the protocol in order to be able to match on mpls
2142 * label fields. However, the actual ethertype is IP so we want to
2143 * avoid matching on this, otherwise we'll fail the match.
2145 if (netif_is_bareudp(filter_dev
) && f
->common
.chain_index
== 0)
2151 static int __parse_cls_flower(struct mlx5e_priv
*priv
,
2152 struct mlx5e_tc_flow
*flow
,
2153 struct mlx5_flow_spec
*spec
,
2154 struct flow_cls_offload
*f
,
2155 struct net_device
*filter_dev
,
2156 u8
*inner_match_level
, u8
*outer_match_level
)
2158 struct netlink_ext_ack
*extack
= f
->common
.extack
;
2159 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
2161 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
2163 void *misc_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
2165 void *misc_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
2167 void *misc_c_3
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
2169 void *misc_v_3
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
2171 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
2172 struct flow_dissector
*dissector
= rule
->match
.dissector
;
2173 enum fs_flow_table_type fs_type
;
2174 bool match_inner_ecn
= true;
2180 fs_type
= mlx5e_is_eswitch_flow(flow
) ? FS_FT_FDB
: FS_FT_NIC_RX
;
2181 match_level
= outer_match_level
;
2183 if (dissector
->used_keys
&
2184 ~(BIT(FLOW_DISSECTOR_KEY_META
) |
2185 BIT(FLOW_DISSECTOR_KEY_CONTROL
) |
2186 BIT(FLOW_DISSECTOR_KEY_BASIC
) |
2187 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS
) |
2188 BIT(FLOW_DISSECTOR_KEY_VLAN
) |
2189 BIT(FLOW_DISSECTOR_KEY_CVLAN
) |
2190 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS
) |
2191 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS
) |
2192 BIT(FLOW_DISSECTOR_KEY_PORTS
) |
2193 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID
) |
2194 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) |
2195 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
) |
2196 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS
) |
2197 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL
) |
2198 BIT(FLOW_DISSECTOR_KEY_TCP
) |
2199 BIT(FLOW_DISSECTOR_KEY_IP
) |
2200 BIT(FLOW_DISSECTOR_KEY_CT
) |
2201 BIT(FLOW_DISSECTOR_KEY_ENC_IP
) |
2202 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS
) |
2203 BIT(FLOW_DISSECTOR_KEY_ICMP
) |
2204 BIT(FLOW_DISSECTOR_KEY_MPLS
))) {
2205 NL_SET_ERR_MSG_MOD(extack
, "Unsupported key");
2206 netdev_dbg(priv
->netdev
, "Unsupported key used: 0x%x\n",
2207 dissector
->used_keys
);
2211 if (mlx5e_get_tc_tun(filter_dev
)) {
2212 bool match_inner
= false;
2214 err
= parse_tunnel_attr(priv
, flow
, spec
, f
, filter_dev
,
2215 outer_match_level
, &match_inner
);
2220 /* header pointers should point to the inner headers
2221 * if the packet was decapsulated already.
2222 * outer headers are set by parse_tunnel_attr.
2224 match_level
= inner_match_level
;
2225 headers_c
= get_match_inner_headers_criteria(spec
);
2226 headers_v
= get_match_inner_headers_value(spec
);
2229 err
= mlx5e_tc_verify_tunnel_ecn(priv
, f
, &match_inner_ecn
);
2234 err
= mlx5e_flower_parse_meta(filter_dev
, f
);
2238 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_BASIC
) &&
2239 !skip_key_basic(filter_dev
, f
)) {
2240 struct flow_match_basic match
;
2242 flow_rule_match_basic(rule
, &match
);
2243 mlx5e_tc_set_ethertype(priv
->mdev
, &match
,
2244 match_level
== outer_match_level
,
2245 headers_c
, headers_v
);
2247 if (match
.mask
->n_proto
)
2248 *match_level
= MLX5_MATCH_L2
;
2250 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_VLAN
) ||
2251 is_vlan_dev(filter_dev
)) {
2252 struct flow_dissector_key_vlan filter_dev_mask
;
2253 struct flow_dissector_key_vlan filter_dev_key
;
2254 struct flow_match_vlan match
;
2256 if (is_vlan_dev(filter_dev
)) {
2257 match
.key
= &filter_dev_key
;
2258 match
.key
->vlan_id
= vlan_dev_vlan_id(filter_dev
);
2259 match
.key
->vlan_tpid
= vlan_dev_vlan_proto(filter_dev
);
2260 match
.key
->vlan_priority
= 0;
2261 match
.mask
= &filter_dev_mask
;
2262 memset(match
.mask
, 0xff, sizeof(*match
.mask
));
2263 match
.mask
->vlan_priority
= 0;
2265 flow_rule_match_vlan(rule
, &match
);
2267 if (match
.mask
->vlan_id
||
2268 match
.mask
->vlan_priority
||
2269 match
.mask
->vlan_tpid
) {
2270 if (match
.key
->vlan_tpid
== htons(ETH_P_8021AD
)) {
2271 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2273 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2276 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2278 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2282 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_vid
,
2283 match
.mask
->vlan_id
);
2284 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_vid
,
2285 match
.key
->vlan_id
);
2287 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_prio
,
2288 match
.mask
->vlan_priority
);
2289 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_prio
,
2290 match
.key
->vlan_priority
);
2292 *match_level
= MLX5_MATCH_L2
;
2294 if (!flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_CVLAN
) &&
2295 match
.mask
->vlan_eth_type
&&
2296 MLX5_CAP_FLOWTABLE_TYPE(priv
->mdev
,
2297 ft_field_support
.outer_second_vid
,
2299 MLX5_SET(fte_match_set_misc
, misc_c
,
2300 outer_second_cvlan_tag
, 1);
2301 spec
->match_criteria_enable
|=
2302 MLX5_MATCH_MISC_PARAMETERS
;
2305 } else if (*match_level
!= MLX5_MATCH_NONE
) {
2306 /* cvlan_tag enabled in match criteria and
2307 * disabled in match value means both S & C tags
2308 * don't exist (untagged of both)
2310 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
, 1);
2311 *match_level
= MLX5_MATCH_L2
;
2314 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_CVLAN
)) {
2315 struct flow_match_vlan match
;
2317 flow_rule_match_cvlan(rule
, &match
);
2318 if (match
.mask
->vlan_id
||
2319 match
.mask
->vlan_priority
||
2320 match
.mask
->vlan_tpid
) {
2321 if (!MLX5_CAP_FLOWTABLE_TYPE(priv
->mdev
, ft_field_support
.outer_second_vid
,
2323 NL_SET_ERR_MSG_MOD(extack
,
2324 "Matching on CVLAN is not supported");
2328 if (match
.key
->vlan_tpid
== htons(ETH_P_8021AD
)) {
2329 MLX5_SET(fte_match_set_misc
, misc_c
,
2330 outer_second_svlan_tag
, 1);
2331 MLX5_SET(fte_match_set_misc
, misc_v
,
2332 outer_second_svlan_tag
, 1);
2334 MLX5_SET(fte_match_set_misc
, misc_c
,
2335 outer_second_cvlan_tag
, 1);
2336 MLX5_SET(fte_match_set_misc
, misc_v
,
2337 outer_second_cvlan_tag
, 1);
2340 MLX5_SET(fte_match_set_misc
, misc_c
, outer_second_vid
,
2341 match
.mask
->vlan_id
);
2342 MLX5_SET(fte_match_set_misc
, misc_v
, outer_second_vid
,
2343 match
.key
->vlan_id
);
2344 MLX5_SET(fte_match_set_misc
, misc_c
, outer_second_prio
,
2345 match
.mask
->vlan_priority
);
2346 MLX5_SET(fte_match_set_misc
, misc_v
, outer_second_prio
,
2347 match
.key
->vlan_priority
);
2349 *match_level
= MLX5_MATCH_L2
;
2350 spec
->match_criteria_enable
|= MLX5_MATCH_MISC_PARAMETERS
;
2354 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ETH_ADDRS
)) {
2355 struct flow_match_eth_addrs match
;
2357 flow_rule_match_eth_addrs(rule
, &match
);
2358 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2361 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2365 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2368 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2372 if (!is_zero_ether_addr(match
.mask
->src
) ||
2373 !is_zero_ether_addr(match
.mask
->dst
))
2374 *match_level
= MLX5_MATCH_L2
;
2377 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_CONTROL
)) {
2378 struct flow_match_control match
;
2380 flow_rule_match_control(rule
, &match
);
2381 addr_type
= match
.key
->addr_type
;
2383 /* the HW doesn't support frag first/later */
2384 if (match
.mask
->flags
& FLOW_DIS_FIRST_FRAG
)
2387 if (match
.mask
->flags
& FLOW_DIS_IS_FRAGMENT
) {
2388 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
2389 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
,
2390 match
.key
->flags
& FLOW_DIS_IS_FRAGMENT
);
2392 /* the HW doesn't need L3 inline to match on frag=no */
2393 if (!(match
.key
->flags
& FLOW_DIS_IS_FRAGMENT
))
2394 *match_level
= MLX5_MATCH_L2
;
2395 /* *** L2 attributes parsing up to here *** */
2397 *match_level
= MLX5_MATCH_L3
;
2401 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_BASIC
)) {
2402 struct flow_match_basic match
;
2404 flow_rule_match_basic(rule
, &match
);
2405 ip_proto
= match
.key
->ip_proto
;
2407 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
,
2408 match
.mask
->ip_proto
);
2409 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
,
2410 match
.key
->ip_proto
);
2412 if (match
.mask
->ip_proto
)
2413 *match_level
= MLX5_MATCH_L3
;
2416 if (addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
2417 struct flow_match_ipv4_addrs match
;
2419 flow_rule_match_ipv4_addrs(rule
, &match
);
2420 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2421 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2422 &match
.mask
->src
, sizeof(match
.mask
->src
));
2423 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2424 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2425 &match
.key
->src
, sizeof(match
.key
->src
));
2426 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2427 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2428 &match
.mask
->dst
, sizeof(match
.mask
->dst
));
2429 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2430 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2431 &match
.key
->dst
, sizeof(match
.key
->dst
));
2433 if (match
.mask
->src
|| match
.mask
->dst
)
2434 *match_level
= MLX5_MATCH_L3
;
2437 if (addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
2438 struct flow_match_ipv6_addrs match
;
2440 flow_rule_match_ipv6_addrs(rule
, &match
);
2441 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2442 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
2443 &match
.mask
->src
, sizeof(match
.mask
->src
));
2444 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2445 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
2446 &match
.key
->src
, sizeof(match
.key
->src
));
2448 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2449 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
2450 &match
.mask
->dst
, sizeof(match
.mask
->dst
));
2451 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2452 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
2453 &match
.key
->dst
, sizeof(match
.key
->dst
));
2455 if (ipv6_addr_type(&match
.mask
->src
) != IPV6_ADDR_ANY
||
2456 ipv6_addr_type(&match
.mask
->dst
) != IPV6_ADDR_ANY
)
2457 *match_level
= MLX5_MATCH_L3
;
2460 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_IP
)) {
2461 struct flow_match_ip match
;
2463 flow_rule_match_ip(rule
, &match
);
2464 if (match_inner_ecn
) {
2465 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_ecn
,
2466 match
.mask
->tos
& 0x3);
2467 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_ecn
,
2468 match
.key
->tos
& 0x3);
2471 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_dscp
,
2472 match
.mask
->tos
>> 2);
2473 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_dscp
,
2474 match
.key
->tos
>> 2);
2476 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ttl_hoplimit
,
2478 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ttl_hoplimit
,
2481 if (match
.mask
->ttl
&&
2482 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv
->mdev
,
2483 ft_field_support
.outer_ipv4_ttl
)) {
2484 NL_SET_ERR_MSG_MOD(extack
,
2485 "Matching on TTL is not supported");
2489 if (match
.mask
->tos
|| match
.mask
->ttl
)
2490 *match_level
= MLX5_MATCH_L3
;
2493 /* *** L3 attributes parsing up to here *** */
2495 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_PORTS
)) {
2496 struct flow_match_ports match
;
2498 flow_rule_match_ports(rule
, &match
);
2501 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2502 tcp_sport
, ntohs(match
.mask
->src
));
2503 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2504 tcp_sport
, ntohs(match
.key
->src
));
2506 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2507 tcp_dport
, ntohs(match
.mask
->dst
));
2508 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2509 tcp_dport
, ntohs(match
.key
->dst
));
2513 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2514 udp_sport
, ntohs(match
.mask
->src
));
2515 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2516 udp_sport
, ntohs(match
.key
->src
));
2518 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2519 udp_dport
, ntohs(match
.mask
->dst
));
2520 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2521 udp_dport
, ntohs(match
.key
->dst
));
2524 NL_SET_ERR_MSG_MOD(extack
,
2525 "Only UDP and TCP transports are supported for L4 matching");
2526 netdev_err(priv
->netdev
,
2527 "Only UDP and TCP transport are supported\n");
2531 if (match
.mask
->src
|| match
.mask
->dst
)
2532 *match_level
= MLX5_MATCH_L4
;
2535 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_TCP
)) {
2536 struct flow_match_tcp match
;
2538 flow_rule_match_tcp(rule
, &match
);
2539 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, tcp_flags
,
2540 ntohs(match
.mask
->flags
));
2541 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, tcp_flags
,
2542 ntohs(match
.key
->flags
));
2544 if (match
.mask
->flags
)
2545 *match_level
= MLX5_MATCH_L4
;
2547 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ICMP
)) {
2548 struct flow_match_icmp match
;
2550 flow_rule_match_icmp(rule
, &match
);
2553 if (!(MLX5_CAP_GEN(priv
->mdev
, flex_parser_protocols
) &
2554 MLX5_FLEX_PROTO_ICMP
))
2556 MLX5_SET(fte_match_set_misc3
, misc_c_3
, icmp_type
,
2558 MLX5_SET(fte_match_set_misc3
, misc_v_3
, icmp_type
,
2560 MLX5_SET(fte_match_set_misc3
, misc_c_3
, icmp_code
,
2562 MLX5_SET(fte_match_set_misc3
, misc_v_3
, icmp_code
,
2565 case IPPROTO_ICMPV6
:
2566 if (!(MLX5_CAP_GEN(priv
->mdev
, flex_parser_protocols
) &
2567 MLX5_FLEX_PROTO_ICMPV6
))
2569 MLX5_SET(fte_match_set_misc3
, misc_c_3
, icmpv6_type
,
2571 MLX5_SET(fte_match_set_misc3
, misc_v_3
, icmpv6_type
,
2573 MLX5_SET(fte_match_set_misc3
, misc_c_3
, icmpv6_code
,
2575 MLX5_SET(fte_match_set_misc3
, misc_v_3
, icmpv6_code
,
2579 NL_SET_ERR_MSG_MOD(extack
,
2580 "Code and type matching only with ICMP and ICMPv6");
2581 netdev_err(priv
->netdev
,
2582 "Code and type matching only with ICMP and ICMPv6\n");
2585 if (match
.mask
->code
|| match
.mask
->type
) {
2586 *match_level
= MLX5_MATCH_L4
;
2587 spec
->match_criteria_enable
|= MLX5_MATCH_MISC_PARAMETERS_3
;
2590 /* Currently supported only for MPLS over UDP */
2591 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_MPLS
) &&
2592 !netif_is_bareudp(filter_dev
)) {
2593 NL_SET_ERR_MSG_MOD(extack
,
2594 "Matching on MPLS is supported only for MPLS over UDP");
2595 netdev_err(priv
->netdev
,
2596 "Matching on MPLS is supported only for MPLS over UDP\n");
2603 static int parse_cls_flower(struct mlx5e_priv
*priv
,
2604 struct mlx5e_tc_flow
*flow
,
2605 struct mlx5_flow_spec
*spec
,
2606 struct flow_cls_offload
*f
,
2607 struct net_device
*filter_dev
)
2609 u8 inner_match_level
, outer_match_level
, non_tunnel_match_level
;
2610 struct netlink_ext_ack
*extack
= f
->common
.extack
;
2611 struct mlx5_core_dev
*dev
= priv
->mdev
;
2612 struct mlx5_eswitch
*esw
= dev
->priv
.eswitch
;
2613 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
2614 struct mlx5_eswitch_rep
*rep
;
2615 bool is_eswitch_flow
;
2618 inner_match_level
= MLX5_MATCH_NONE
;
2619 outer_match_level
= MLX5_MATCH_NONE
;
2621 err
= __parse_cls_flower(priv
, flow
, spec
, f
, filter_dev
,
2622 &inner_match_level
, &outer_match_level
);
2623 non_tunnel_match_level
= (inner_match_level
== MLX5_MATCH_NONE
) ?
2624 outer_match_level
: inner_match_level
;
2626 is_eswitch_flow
= mlx5e_is_eswitch_flow(flow
);
2627 if (!err
&& is_eswitch_flow
) {
2629 if (rep
->vport
!= MLX5_VPORT_UPLINK
&&
2630 (esw
->offloads
.inline_mode
!= MLX5_INLINE_MODE_NONE
&&
2631 esw
->offloads
.inline_mode
< non_tunnel_match_level
)) {
2632 NL_SET_ERR_MSG_MOD(extack
,
2633 "Flow is not offloaded due to min inline setting");
2634 netdev_warn(priv
->netdev
,
2635 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2636 non_tunnel_match_level
, esw
->offloads
.inline_mode
);
2641 flow
->attr
->inner_match_level
= inner_match_level
;
2642 flow
->attr
->outer_match_level
= outer_match_level
;
2648 struct pedit_headers
{
2650 struct vlan_hdr vlan
;
2657 struct pedit_headers_action
{
2658 struct pedit_headers vals
;
2659 struct pedit_headers masks
;
2663 static int pedit_header_offsets
[] = {
2664 [FLOW_ACT_MANGLE_HDR_TYPE_ETH
] = offsetof(struct pedit_headers
, eth
),
2665 [FLOW_ACT_MANGLE_HDR_TYPE_IP4
] = offsetof(struct pedit_headers
, ip4
),
2666 [FLOW_ACT_MANGLE_HDR_TYPE_IP6
] = offsetof(struct pedit_headers
, ip6
),
2667 [FLOW_ACT_MANGLE_HDR_TYPE_TCP
] = offsetof(struct pedit_headers
, tcp
),
2668 [FLOW_ACT_MANGLE_HDR_TYPE_UDP
] = offsetof(struct pedit_headers
, udp
),
2671 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2673 static int set_pedit_val(u8 hdr_type
, u32 mask
, u32 val
, u32 offset
,
2674 struct pedit_headers_action
*hdrs
)
2676 u32
*curr_pmask
, *curr_pval
;
2678 curr_pmask
= (u32
*)(pedit_header(&hdrs
->masks
, hdr_type
) + offset
);
2679 curr_pval
= (u32
*)(pedit_header(&hdrs
->vals
, hdr_type
) + offset
);
2681 if (*curr_pmask
& mask
) /* disallow acting twice on the same location */
2684 *curr_pmask
|= mask
;
2685 *curr_pval
|= (val
& mask
);
2693 struct mlx5_fields
{
2701 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2702 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2703 offsetof(struct pedit_headers, field) + (off), \
2704 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2706 /* masked values are the same and there are no rewrites that do not have a
2709 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2710 type matchmaskx = *(type *)(matchmaskp); \
2711 type matchvalx = *(type *)(matchvalp); \
2712 type maskx = *(type *)(maskp); \
2713 type valx = *(type *)(valp); \
2715 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2719 static bool cmp_val_mask(void *valp
, void *maskp
, void *matchvalp
,
2720 void *matchmaskp
, u8 bsize
)
2726 same
= SAME_VAL_MASK(u8
, valp
, maskp
, matchvalp
, matchmaskp
);
2729 same
= SAME_VAL_MASK(u16
, valp
, maskp
, matchvalp
, matchmaskp
);
2732 same
= SAME_VAL_MASK(u32
, valp
, maskp
, matchvalp
, matchmaskp
);
2739 static struct mlx5_fields fields
[] = {
2740 OFFLOAD(DMAC_47_16
, 32, U32_MAX
, eth
.h_dest
[0], 0, dmac_47_16
),
2741 OFFLOAD(DMAC_15_0
, 16, U16_MAX
, eth
.h_dest
[4], 0, dmac_15_0
),
2742 OFFLOAD(SMAC_47_16
, 32, U32_MAX
, eth
.h_source
[0], 0, smac_47_16
),
2743 OFFLOAD(SMAC_15_0
, 16, U16_MAX
, eth
.h_source
[4], 0, smac_15_0
),
2744 OFFLOAD(ETHERTYPE
, 16, U16_MAX
, eth
.h_proto
, 0, ethertype
),
2745 OFFLOAD(FIRST_VID
, 16, U16_MAX
, vlan
.h_vlan_TCI
, 0, first_vid
),
2747 OFFLOAD(IP_DSCP
, 8, 0xfc, ip4
.tos
, 0, ip_dscp
),
2748 OFFLOAD(IP_TTL
, 8, U8_MAX
, ip4
.ttl
, 0, ttl_hoplimit
),
2749 OFFLOAD(SIPV4
, 32, U32_MAX
, ip4
.saddr
, 0, src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2750 OFFLOAD(DIPV4
, 32, U32_MAX
, ip4
.daddr
, 0, dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2752 OFFLOAD(SIPV6_127_96
, 32, U32_MAX
, ip6
.saddr
.s6_addr32
[0], 0,
2753 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[0]),
2754 OFFLOAD(SIPV6_95_64
, 32, U32_MAX
, ip6
.saddr
.s6_addr32
[1], 0,
2755 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[4]),
2756 OFFLOAD(SIPV6_63_32
, 32, U32_MAX
, ip6
.saddr
.s6_addr32
[2], 0,
2757 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[8]),
2758 OFFLOAD(SIPV6_31_0
, 32, U32_MAX
, ip6
.saddr
.s6_addr32
[3], 0,
2759 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[12]),
2760 OFFLOAD(DIPV6_127_96
, 32, U32_MAX
, ip6
.daddr
.s6_addr32
[0], 0,
2761 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[0]),
2762 OFFLOAD(DIPV6_95_64
, 32, U32_MAX
, ip6
.daddr
.s6_addr32
[1], 0,
2763 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[4]),
2764 OFFLOAD(DIPV6_63_32
, 32, U32_MAX
, ip6
.daddr
.s6_addr32
[2], 0,
2765 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[8]),
2766 OFFLOAD(DIPV6_31_0
, 32, U32_MAX
, ip6
.daddr
.s6_addr32
[3], 0,
2767 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[12]),
2768 OFFLOAD(IPV6_HOPLIMIT
, 8, U8_MAX
, ip6
.hop_limit
, 0, ttl_hoplimit
),
2769 OFFLOAD(IP_DSCP
, 16, 0xc00f, ip6
, 0, ip_dscp
),
2771 OFFLOAD(TCP_SPORT
, 16, U16_MAX
, tcp
.source
, 0, tcp_sport
),
2772 OFFLOAD(TCP_DPORT
, 16, U16_MAX
, tcp
.dest
, 0, tcp_dport
),
2773 /* in linux iphdr tcp_flags is 8 bits long */
2774 OFFLOAD(TCP_FLAGS
, 8, U8_MAX
, tcp
.ack_seq
, 5, tcp_flags
),
2776 OFFLOAD(UDP_SPORT
, 16, U16_MAX
, udp
.source
, 0, udp_sport
),
2777 OFFLOAD(UDP_DPORT
, 16, U16_MAX
, udp
.dest
, 0, udp_dport
),
2780 static unsigned long mask_to_le(unsigned long mask
, int size
)
2786 mask_be32
= (__force __be32
)(mask
);
2787 mask
= (__force
unsigned long)cpu_to_le32(be32_to_cpu(mask_be32
));
2788 } else if (size
== 16) {
2789 mask_be32
= (__force __be32
)(mask
);
2790 mask_be16
= *(__be16
*)&mask_be32
;
2791 mask
= (__force
unsigned long)cpu_to_le16(be16_to_cpu(mask_be16
));
2796 static int offload_pedit_fields(struct mlx5e_priv
*priv
,
2798 struct pedit_headers_action
*hdrs
,
2799 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2801 struct netlink_ext_ack
*extack
)
2803 struct pedit_headers
*set_masks
, *add_masks
, *set_vals
, *add_vals
;
2804 int i
, action_size
, first
, last
, next_z
;
2805 void *headers_c
, *headers_v
, *action
, *vals_p
;
2806 u32
*s_masks_p
, *a_masks_p
, s_mask
, a_mask
;
2807 struct mlx5e_tc_mod_hdr_acts
*mod_acts
;
2808 struct mlx5_fields
*f
;
2809 unsigned long mask
, field_mask
;
2813 mod_acts
= &parse_attr
->mod_hdr_acts
;
2814 headers_c
= get_match_headers_criteria(*action_flags
, &parse_attr
->spec
);
2815 headers_v
= get_match_headers_value(*action_flags
, &parse_attr
->spec
);
2817 set_masks
= &hdrs
[0].masks
;
2818 add_masks
= &hdrs
[1].masks
;
2819 set_vals
= &hdrs
[0].vals
;
2820 add_vals
= &hdrs
[1].vals
;
2822 action_size
= MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto
);
2824 for (i
= 0; i
< ARRAY_SIZE(fields
); i
++) {
2828 /* avoid seeing bits set from previous iterations */
2832 s_masks_p
= (void *)set_masks
+ f
->offset
;
2833 a_masks_p
= (void *)add_masks
+ f
->offset
;
2835 s_mask
= *s_masks_p
& f
->field_mask
;
2836 a_mask
= *a_masks_p
& f
->field_mask
;
2838 if (!s_mask
&& !a_mask
) /* nothing to offload here */
2841 if (s_mask
&& a_mask
) {
2842 NL_SET_ERR_MSG_MOD(extack
,
2843 "can't set and add to the same HW field");
2844 netdev_warn(priv
->netdev
,
2845 "mlx5: can't set and add to the same HW field (%x)\n",
2852 void *match_mask
= headers_c
+ f
->match_offset
;
2853 void *match_val
= headers_v
+ f
->match_offset
;
2855 cmd
= MLX5_ACTION_TYPE_SET
;
2857 vals_p
= (void *)set_vals
+ f
->offset
;
2858 /* don't rewrite if we have a match on the same value */
2859 if (cmp_val_mask(vals_p
, s_masks_p
, match_val
,
2860 match_mask
, f
->field_bsize
))
2862 /* clear to denote we consumed this field */
2863 *s_masks_p
&= ~f
->field_mask
;
2865 cmd
= MLX5_ACTION_TYPE_ADD
;
2867 vals_p
= (void *)add_vals
+ f
->offset
;
2868 /* add 0 is no change */
2869 if ((*(u32
*)vals_p
& f
->field_mask
) == 0)
2871 /* clear to denote we consumed this field */
2872 *a_masks_p
&= ~f
->field_mask
;
2877 mask
= mask_to_le(mask
, f
->field_bsize
);
2879 first
= find_first_bit(&mask
, f
->field_bsize
);
2880 next_z
= find_next_zero_bit(&mask
, f
->field_bsize
, first
);
2881 last
= find_last_bit(&mask
, f
->field_bsize
);
2882 if (first
< next_z
&& next_z
< last
) {
2883 NL_SET_ERR_MSG_MOD(extack
,
2884 "rewrite of few sub-fields isn't supported");
2885 netdev_warn(priv
->netdev
,
2886 "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2891 err
= alloc_mod_hdr_actions(priv
->mdev
, namespace, mod_acts
);
2893 NL_SET_ERR_MSG_MOD(extack
,
2894 "too many pedit actions, can't offload");
2895 mlx5_core_warn(priv
->mdev
,
2896 "mlx5: parsed %d pedit actions, can't do more\n",
2897 mod_acts
->num_actions
);
2901 action
= mod_acts
->actions
+
2902 (mod_acts
->num_actions
* action_size
);
2903 MLX5_SET(set_action_in
, action
, action_type
, cmd
);
2904 MLX5_SET(set_action_in
, action
, field
, f
->field
);
2906 if (cmd
== MLX5_ACTION_TYPE_SET
) {
2909 field_mask
= mask_to_le(f
->field_mask
, f
->field_bsize
);
2911 /* if field is bit sized it can start not from first bit */
2912 start
= find_first_bit(&field_mask
, f
->field_bsize
);
2914 MLX5_SET(set_action_in
, action
, offset
, first
- start
);
2915 /* length is num of bits to be written, zero means length of 32 */
2916 MLX5_SET(set_action_in
, action
, length
, (last
- first
+ 1));
2919 if (f
->field_bsize
== 32)
2920 MLX5_SET(set_action_in
, action
, data
, ntohl(*(__be32
*)vals_p
) >> first
);
2921 else if (f
->field_bsize
== 16)
2922 MLX5_SET(set_action_in
, action
, data
, ntohs(*(__be16
*)vals_p
) >> first
);
2923 else if (f
->field_bsize
== 8)
2924 MLX5_SET(set_action_in
, action
, data
, *(u8
*)vals_p
>> first
);
2926 ++mod_acts
->num_actions
;
2932 static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev
*mdev
,
2935 if (namespace == MLX5_FLOW_NAMESPACE_FDB
) /* FDB offloading */
2936 return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev
, max_modify_header_actions
);
2937 else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
2938 return MLX5_CAP_FLOWTABLE_NIC_RX(mdev
, max_modify_header_actions
);
2941 int alloc_mod_hdr_actions(struct mlx5_core_dev
*mdev
,
2943 struct mlx5e_tc_mod_hdr_acts
*mod_hdr_acts
)
2945 int action_size
, new_num_actions
, max_hw_actions
;
2946 size_t new_sz
, old_sz
;
2949 if (mod_hdr_acts
->num_actions
< mod_hdr_acts
->max_actions
)
2952 action_size
= MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto
);
2954 max_hw_actions
= mlx5e_flow_namespace_max_modify_action(mdev
,
2956 new_num_actions
= min(max_hw_actions
,
2957 mod_hdr_acts
->actions
?
2958 mod_hdr_acts
->max_actions
* 2 : 1);
2959 if (mod_hdr_acts
->max_actions
== new_num_actions
)
2962 new_sz
= action_size
* new_num_actions
;
2963 old_sz
= mod_hdr_acts
->max_actions
* action_size
;
2964 ret
= krealloc(mod_hdr_acts
->actions
, new_sz
, GFP_KERNEL
);
2968 memset(ret
+ old_sz
, 0, new_sz
- old_sz
);
2969 mod_hdr_acts
->actions
= ret
;
2970 mod_hdr_acts
->max_actions
= new_num_actions
;
2975 void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts
*mod_hdr_acts
)
2977 kfree(mod_hdr_acts
->actions
);
2978 mod_hdr_acts
->actions
= NULL
;
2979 mod_hdr_acts
->num_actions
= 0;
2980 mod_hdr_acts
->max_actions
= 0;
2983 static const struct pedit_headers zero_masks
= {};
2986 parse_pedit_to_modify_hdr(struct mlx5e_priv
*priv
,
2987 const struct flow_action_entry
*act
, int namespace,
2988 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2989 struct pedit_headers_action
*hdrs
,
2990 struct netlink_ext_ack
*extack
)
2992 u8 cmd
= (act
->id
== FLOW_ACTION_MANGLE
) ? 0 : 1;
2993 int err
= -EOPNOTSUPP
;
2994 u32 mask
, val
, offset
;
2997 htype
= act
->mangle
.htype
;
2998 err
= -EOPNOTSUPP
; /* can't be all optimistic */
3000 if (htype
== FLOW_ACT_MANGLE_UNSPEC
) {
3001 NL_SET_ERR_MSG_MOD(extack
, "legacy pedit isn't offloaded");
3005 if (!mlx5e_flow_namespace_max_modify_action(priv
->mdev
, namespace)) {
3006 NL_SET_ERR_MSG_MOD(extack
,
3007 "The pedit offload action is not supported");
3011 mask
= act
->mangle
.mask
;
3012 val
= act
->mangle
.val
;
3013 offset
= act
->mangle
.offset
;
3015 err
= set_pedit_val(htype
, ~mask
, val
, offset
, &hdrs
[cmd
]);
3027 parse_pedit_to_reformat(struct mlx5e_priv
*priv
,
3028 const struct flow_action_entry
*act
,
3029 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3030 struct netlink_ext_ack
*extack
)
3032 u32 mask
, val
, offset
;
3035 if (act
->id
!= FLOW_ACTION_MANGLE
)
3038 if (act
->mangle
.htype
!= FLOW_ACT_MANGLE_HDR_TYPE_ETH
) {
3039 NL_SET_ERR_MSG_MOD(extack
, "Only Ethernet modification is supported");
3043 mask
= ~act
->mangle
.mask
;
3044 val
= act
->mangle
.val
;
3045 offset
= act
->mangle
.offset
;
3046 p
= (u32
*)&parse_attr
->eth
;
3047 *(p
+ (offset
>> 2)) |= (val
& mask
);
3052 static int parse_tc_pedit_action(struct mlx5e_priv
*priv
,
3053 const struct flow_action_entry
*act
, int namespace,
3054 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3055 struct pedit_headers_action
*hdrs
,
3056 struct mlx5e_tc_flow
*flow
,
3057 struct netlink_ext_ack
*extack
)
3059 if (flow
&& flow_flag_test(flow
, L3_TO_L2_DECAP
))
3060 return parse_pedit_to_reformat(priv
, act
, parse_attr
, extack
);
3062 return parse_pedit_to_modify_hdr(priv
, act
, namespace,
3063 parse_attr
, hdrs
, extack
);
3066 static int alloc_tc_pedit_action(struct mlx5e_priv
*priv
, int namespace,
3067 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3068 struct pedit_headers_action
*hdrs
,
3070 struct netlink_ext_ack
*extack
)
3072 struct pedit_headers
*cmd_masks
;
3076 err
= offload_pedit_fields(priv
, namespace, hdrs
, parse_attr
,
3077 action_flags
, extack
);
3079 goto out_dealloc_parsed_actions
;
3081 for (cmd
= 0; cmd
< __PEDIT_CMD_MAX
; cmd
++) {
3082 cmd_masks
= &hdrs
[cmd
].masks
;
3083 if (memcmp(cmd_masks
, &zero_masks
, sizeof(zero_masks
))) {
3084 NL_SET_ERR_MSG_MOD(extack
,
3085 "attempt to offload an unsupported field");
3086 netdev_warn(priv
->netdev
, "attempt to offload an unsupported field (cmd %d)\n", cmd
);
3087 print_hex_dump(KERN_WARNING
, "mask: ", DUMP_PREFIX_ADDRESS
,
3088 16, 1, cmd_masks
, sizeof(zero_masks
), true);
3090 goto out_dealloc_parsed_actions
;
3096 out_dealloc_parsed_actions
:
3097 dealloc_mod_hdr_actions(&parse_attr
->mod_hdr_acts
);
3101 static bool csum_offload_supported(struct mlx5e_priv
*priv
,
3104 struct netlink_ext_ack
*extack
)
3106 u32 prot_flags
= TCA_CSUM_UPDATE_FLAG_IPV4HDR
| TCA_CSUM_UPDATE_FLAG_TCP
|
3107 TCA_CSUM_UPDATE_FLAG_UDP
;
3109 /* The HW recalcs checksums only if re-writing headers */
3110 if (!(action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)) {
3111 NL_SET_ERR_MSG_MOD(extack
,
3112 "TC csum action is only offloaded with pedit");
3113 netdev_warn(priv
->netdev
,
3114 "TC csum action is only offloaded with pedit\n");
3118 if (update_flags
& ~prot_flags
) {
3119 NL_SET_ERR_MSG_MOD(extack
,
3120 "can't offload TC csum action for some header/s");
3121 netdev_warn(priv
->netdev
,
3122 "can't offload TC csum action for some header/s - flags %#x\n",
3130 struct ip_ttl_word
{
3136 struct ipv6_hoplimit_word
{
3142 static int is_action_keys_supported(const struct flow_action_entry
*act
,
3143 bool ct_flow
, bool *modify_ip_header
,
3145 struct netlink_ext_ack
*extack
)
3150 htype
= act
->mangle
.htype
;
3151 offset
= act
->mangle
.offset
;
3152 mask
= ~act
->mangle
.mask
;
3153 /* For IPv4 & IPv6 header check 4 byte word,
3154 * to determine that modified fields
3155 * are NOT ttl & hop_limit only.
3157 if (htype
== FLOW_ACT_MANGLE_HDR_TYPE_IP4
) {
3158 struct ip_ttl_word
*ttl_word
=
3159 (struct ip_ttl_word
*)&mask
;
3161 if (offset
!= offsetof(struct iphdr
, ttl
) ||
3162 ttl_word
->protocol
||
3164 *modify_ip_header
= true;
3167 if (offset
>= offsetof(struct iphdr
, saddr
))
3168 *modify_tuple
= true;
3170 if (ct_flow
&& *modify_tuple
) {
3171 NL_SET_ERR_MSG_MOD(extack
,
3172 "can't offload re-write of ipv4 address with action ct");
3175 } else if (htype
== FLOW_ACT_MANGLE_HDR_TYPE_IP6
) {
3176 struct ipv6_hoplimit_word
*hoplimit_word
=
3177 (struct ipv6_hoplimit_word
*)&mask
;
3179 if (offset
!= offsetof(struct ipv6hdr
, payload_len
) ||
3180 hoplimit_word
->payload_len
||
3181 hoplimit_word
->nexthdr
) {
3182 *modify_ip_header
= true;
3185 if (ct_flow
&& offset
>= offsetof(struct ipv6hdr
, saddr
))
3186 *modify_tuple
= true;
3188 if (ct_flow
&& *modify_tuple
) {
3189 NL_SET_ERR_MSG_MOD(extack
,
3190 "can't offload re-write of ipv6 address with action ct");
3193 } else if (htype
== FLOW_ACT_MANGLE_HDR_TYPE_TCP
||
3194 htype
== FLOW_ACT_MANGLE_HDR_TYPE_UDP
) {
3195 *modify_tuple
= true;
3197 NL_SET_ERR_MSG_MOD(extack
,
3198 "can't offload re-write of transport header ports with action ct");
3206 static bool modify_tuple_supported(bool modify_tuple
, bool ct_clear
,
3207 bool ct_flow
, struct netlink_ext_ack
*extack
,
3208 struct mlx5e_priv
*priv
,
3209 struct mlx5_flow_spec
*spec
)
3211 if (!modify_tuple
|| ct_clear
)
3215 NL_SET_ERR_MSG_MOD(extack
,
3216 "can't offload tuple modification with non-clear ct()");
3217 netdev_info(priv
->netdev
,
3218 "can't offload tuple modification with non-clear ct()");
3222 /* Add ct_state=-trk match so it will be offloaded for non ct flows
3223 * (or after clear action), as otherwise, since the tuple is changed,
3224 * we can't restore ct state
3226 if (mlx5_tc_ct_add_no_trk_match(spec
)) {
3227 NL_SET_ERR_MSG_MOD(extack
,
3228 "can't offload tuple modification with ct matches and no ct(clear) action");
3229 netdev_info(priv
->netdev
,
3230 "can't offload tuple modification with ct matches and no ct(clear) action");
3237 static bool modify_header_match_supported(struct mlx5e_priv
*priv
,
3238 struct mlx5_flow_spec
*spec
,
3239 struct flow_action
*flow_action
,
3240 u32 actions
, bool ct_flow
,
3242 struct netlink_ext_ack
*extack
)
3244 const struct flow_action_entry
*act
;
3245 bool modify_ip_header
, modify_tuple
;
3252 headers_c
= get_match_headers_criteria(actions
, spec
);
3253 headers_v
= get_match_headers_value(actions
, spec
);
3254 ethertype
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ethertype
);
3256 /* for non-IP we only re-write MACs, so we're okay */
3257 if (MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, ip_version
) == 0 &&
3258 ethertype
!= ETH_P_IP
&& ethertype
!= ETH_P_IPV6
)
3261 modify_ip_header
= false;
3262 modify_tuple
= false;
3263 flow_action_for_each(i
, act
, flow_action
) {
3264 if (act
->id
!= FLOW_ACTION_MANGLE
&&
3265 act
->id
!= FLOW_ACTION_ADD
)
3268 err
= is_action_keys_supported(act
, ct_flow
,
3270 &modify_tuple
, extack
);
3275 if (!modify_tuple_supported(modify_tuple
, ct_clear
, ct_flow
, extack
,
3279 ip_proto
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
);
3280 if (modify_ip_header
&& ip_proto
!= IPPROTO_TCP
&&
3281 ip_proto
!= IPPROTO_UDP
&& ip_proto
!= IPPROTO_ICMP
) {
3282 NL_SET_ERR_MSG_MOD(extack
,
3283 "can't offload re-write of non TCP/UDP");
3284 netdev_info(priv
->netdev
, "can't offload re-write of ip proto %d\n",
3293 static bool actions_match_supported(struct mlx5e_priv
*priv
,
3294 struct flow_action
*flow_action
,
3295 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3296 struct mlx5e_tc_flow
*flow
,
3297 struct netlink_ext_ack
*extack
)
3299 bool ct_flow
= false, ct_clear
= false;
3302 ct_clear
= flow
->attr
->ct_attr
.ct_action
&
3304 ct_flow
= flow_flag_test(flow
, CT
) && !ct_clear
;
3305 actions
= flow
->attr
->action
;
3307 if (mlx5e_is_eswitch_flow(flow
)) {
3308 if (flow
->attr
->esw_attr
->split_count
&& ct_flow
&&
3309 !MLX5_CAP_GEN(flow
->attr
->esw_attr
->in_mdev
, reg_c_preserve
)) {
3310 /* All registers used by ct are cleared when using
3313 NL_SET_ERR_MSG_MOD(extack
,
3314 "Can't offload mirroring with action ct");
3319 if (actions
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
3320 return modify_header_match_supported(priv
, &parse_attr
->spec
,
3321 flow_action
, actions
,
3328 static bool same_port_devs(struct mlx5e_priv
*priv
, struct mlx5e_priv
*peer_priv
)
3330 return priv
->mdev
== peer_priv
->mdev
;
3333 static bool same_hw_devs(struct mlx5e_priv
*priv
, struct mlx5e_priv
*peer_priv
)
3335 struct mlx5_core_dev
*fmdev
, *pmdev
;
3336 u64 fsystem_guid
, psystem_guid
;
3339 pmdev
= peer_priv
->mdev
;
3341 fsystem_guid
= mlx5_query_nic_system_image_guid(fmdev
);
3342 psystem_guid
= mlx5_query_nic_system_image_guid(pmdev
);
3344 return (fsystem_guid
== psystem_guid
);
3347 static bool same_vf_reps(struct mlx5e_priv
*priv
,
3348 struct net_device
*out_dev
)
3350 return mlx5e_eswitch_vf_rep(priv
->netdev
) &&
3351 priv
->netdev
== out_dev
;
3354 static int add_vlan_rewrite_action(struct mlx5e_priv
*priv
, int namespace,
3355 const struct flow_action_entry
*act
,
3356 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3357 struct pedit_headers_action
*hdrs
,
3358 u32
*action
, struct netlink_ext_ack
*extack
)
3360 u16 mask16
= VLAN_VID_MASK
;
3361 u16 val16
= act
->vlan
.vid
& VLAN_VID_MASK
;
3362 const struct flow_action_entry pedit_act
= {
3363 .id
= FLOW_ACTION_MANGLE
,
3364 .mangle
.htype
= FLOW_ACT_MANGLE_HDR_TYPE_ETH
,
3365 .mangle
.offset
= offsetof(struct vlan_ethhdr
, h_vlan_TCI
),
3366 .mangle
.mask
= ~(u32
)be16_to_cpu(*(__be16
*)&mask16
),
3367 .mangle
.val
= (u32
)be16_to_cpu(*(__be16
*)&val16
),
3369 u8 match_prio_mask
, match_prio_val
;
3370 void *headers_c
, *headers_v
;
3373 headers_c
= get_match_headers_criteria(*action
, &parse_attr
->spec
);
3374 headers_v
= get_match_headers_value(*action
, &parse_attr
->spec
);
3376 if (!(MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
) &&
3377 MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
))) {
3378 NL_SET_ERR_MSG_MOD(extack
,
3379 "VLAN rewrite action must have VLAN protocol match");
3383 match_prio_mask
= MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, first_prio
);
3384 match_prio_val
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, first_prio
);
3385 if (act
->vlan
.prio
!= (match_prio_val
& match_prio_mask
)) {
3386 NL_SET_ERR_MSG_MOD(extack
,
3387 "Changing VLAN prio is not supported");
3391 err
= parse_tc_pedit_action(priv
, &pedit_act
, namespace, parse_attr
, hdrs
, NULL
, extack
);
3392 *action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
3398 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv
*priv
,
3399 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3400 struct pedit_headers_action
*hdrs
,
3401 u32
*action
, struct netlink_ext_ack
*extack
)
3403 const struct flow_action_entry prio_tag_act
= {
3406 MLX5_GET(fte_match_set_lyr_2_4
,
3407 get_match_headers_value(*action
,
3410 MLX5_GET(fte_match_set_lyr_2_4
,
3411 get_match_headers_criteria(*action
,
3416 return add_vlan_rewrite_action(priv
, MLX5_FLOW_NAMESPACE_FDB
,
3417 &prio_tag_act
, parse_attr
, hdrs
, action
,
3421 static int validate_goto_chain(struct mlx5e_priv
*priv
,
3422 struct mlx5e_tc_flow
*flow
,
3423 const struct flow_action_entry
*act
,
3425 struct netlink_ext_ack
*extack
)
3427 bool is_esw
= mlx5e_is_eswitch_flow(flow
);
3428 struct mlx5_flow_attr
*attr
= flow
->attr
;
3429 bool ft_flow
= mlx5e_is_ft_flow(flow
);
3430 u32 dest_chain
= act
->chain_index
;
3431 struct mlx5_fs_chains
*chains
;
3432 struct mlx5_eswitch
*esw
;
3433 u32 reformat_and_fwd
;
3436 esw
= priv
->mdev
->priv
.eswitch
;
3437 chains
= is_esw
? esw_chains(esw
) : nic_chains(priv
);
3438 max_chain
= mlx5_chains_get_chain_range(chains
);
3439 reformat_and_fwd
= is_esw
?
3440 MLX5_CAP_ESW_FLOWTABLE_FDB(priv
->mdev
, reformat_and_fwd_to_table
) :
3441 MLX5_CAP_FLOWTABLE_NIC_RX(priv
->mdev
, reformat_and_fwd_to_table
);
3444 NL_SET_ERR_MSG_MOD(extack
, "Goto action is not supported");
3448 if (!mlx5_chains_backwards_supported(chains
) &&
3449 dest_chain
<= attr
->chain
) {
3450 NL_SET_ERR_MSG_MOD(extack
,
3451 "Goto lower numbered chain isn't supported");
3455 if (dest_chain
> max_chain
) {
3456 NL_SET_ERR_MSG_MOD(extack
,
3457 "Requested destination chain is out of supported range");
3461 if (actions
& (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT
|
3462 MLX5_FLOW_CONTEXT_ACTION_DECAP
) &&
3463 !reformat_and_fwd
) {
3464 NL_SET_ERR_MSG_MOD(extack
,
3465 "Goto chain is not allowed if action has reformat or decap");
3472 static int parse_tc_nic_actions(struct mlx5e_priv
*priv
,
3473 struct flow_action
*flow_action
,
3474 struct mlx5e_tc_flow
*flow
,
3475 struct netlink_ext_ack
*extack
)
3477 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3478 struct mlx5_flow_attr
*attr
= flow
->attr
;
3479 struct pedit_headers_action hdrs
[2] = {};
3480 const struct flow_action_entry
*act
;
3481 struct mlx5_nic_flow_attr
*nic_attr
;
3485 if (!flow_action_has_entries(flow_action
))
3488 if (!flow_action_hw_stats_check(flow_action
, extack
,
3489 FLOW_ACTION_HW_STATS_DELAYED_BIT
))
3492 nic_attr
= attr
->nic_attr
;
3493 nic_attr
->flow_tag
= MLX5_FS_DEFAULT_FLOW_TAG
;
3494 parse_attr
= attr
->parse_attr
;
3496 flow_action_for_each(i
, act
, flow_action
) {
3498 case FLOW_ACTION_ACCEPT
:
3499 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
3500 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3502 case FLOW_ACTION_DROP
:
3503 action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
|
3504 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3506 case FLOW_ACTION_MANGLE
:
3507 case FLOW_ACTION_ADD
:
3508 err
= parse_tc_pedit_action(priv
, act
, MLX5_FLOW_NAMESPACE_KERNEL
,
3509 parse_attr
, hdrs
, NULL
, extack
);
3513 action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
3515 case FLOW_ACTION_VLAN_MANGLE
:
3516 err
= add_vlan_rewrite_action(priv
,
3517 MLX5_FLOW_NAMESPACE_KERNEL
,
3518 act
, parse_attr
, hdrs
,
3524 case FLOW_ACTION_CSUM
:
3525 if (csum_offload_supported(priv
, action
,
3531 case FLOW_ACTION_REDIRECT
: {
3532 struct net_device
*peer_dev
= act
->dev
;
3534 if (priv
->netdev
->netdev_ops
== peer_dev
->netdev_ops
&&
3535 same_hw_devs(priv
, netdev_priv(peer_dev
))) {
3536 parse_attr
->mirred_ifindex
[0] = peer_dev
->ifindex
;
3537 flow_flag_set(flow
, HAIRPIN
);
3538 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
3539 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3541 NL_SET_ERR_MSG_MOD(extack
,
3542 "device is not on same HW, can't offload");
3543 netdev_warn(priv
->netdev
, "device %s not on same HW, can't offload\n",
3549 case FLOW_ACTION_MARK
: {
3550 u32 mark
= act
->mark
;
3552 if (mark
& ~MLX5E_TC_FLOW_ID_MASK
) {
3553 NL_SET_ERR_MSG_MOD(extack
,
3554 "Bad flow mark - only 16 bit is supported");
3558 nic_attr
->flow_tag
= mark
;
3559 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
3562 case FLOW_ACTION_GOTO
:
3563 err
= validate_goto_chain(priv
, flow
, act
, action
,
3568 action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3569 attr
->dest_chain
= act
->chain_index
;
3571 case FLOW_ACTION_CT
:
3572 err
= mlx5_tc_ct_parse_action(get_ct_priv(priv
), attr
,
3573 &parse_attr
->mod_hdr_acts
,
3578 flow_flag_set(flow
, CT
);
3581 NL_SET_ERR_MSG_MOD(extack
, "The offload action is not supported");
3586 if (hdrs
[TCA_PEDIT_KEY_EX_CMD_SET
].pedits
||
3587 hdrs
[TCA_PEDIT_KEY_EX_CMD_ADD
].pedits
) {
3588 err
= alloc_tc_pedit_action(priv
, MLX5_FLOW_NAMESPACE_KERNEL
,
3589 parse_attr
, hdrs
, &action
, extack
);
3592 /* in case all pedit actions are skipped, remove the MOD_HDR
3595 if (parse_attr
->mod_hdr_acts
.num_actions
== 0) {
3596 action
&= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
3597 dealloc_mod_hdr_actions(&parse_attr
->mod_hdr_acts
);
3601 attr
->action
= action
;
3603 if (attr
->dest_chain
) {
3604 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
3605 NL_SET_ERR_MSG(extack
, "Mirroring goto chain rules isn't supported");
3608 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
3611 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
3612 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
3614 if (!actions_match_supported(priv
, flow_action
, parse_attr
, flow
, extack
))
3620 static bool is_merged_eswitch_vfs(struct mlx5e_priv
*priv
,
3621 struct net_device
*peer_netdev
)
3623 struct mlx5e_priv
*peer_priv
;
3625 peer_priv
= netdev_priv(peer_netdev
);
3627 return (MLX5_CAP_ESW(priv
->mdev
, merged_eswitch
) &&
3628 mlx5e_eswitch_vf_rep(priv
->netdev
) &&
3629 mlx5e_eswitch_vf_rep(peer_netdev
) &&
3630 same_hw_devs(priv
, peer_priv
));
3633 static int parse_tc_vlan_action(struct mlx5e_priv
*priv
,
3634 const struct flow_action_entry
*act
,
3635 struct mlx5_esw_flow_attr
*attr
,
3638 u8 vlan_idx
= attr
->total_vlan
;
3640 if (vlan_idx
>= MLX5_FS_VLAN_DEPTH
)
3644 case FLOW_ACTION_VLAN_POP
:
3646 if (!mlx5_eswitch_vlan_actions_supported(priv
->mdev
,
3647 MLX5_FS_VLAN_DEPTH
))
3650 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2
;
3652 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
3655 case FLOW_ACTION_VLAN_PUSH
:
3656 attr
->vlan_vid
[vlan_idx
] = act
->vlan
.vid
;
3657 attr
->vlan_prio
[vlan_idx
] = act
->vlan
.prio
;
3658 attr
->vlan_proto
[vlan_idx
] = act
->vlan
.proto
;
3659 if (!attr
->vlan_proto
[vlan_idx
])
3660 attr
->vlan_proto
[vlan_idx
] = htons(ETH_P_8021Q
);
3663 if (!mlx5_eswitch_vlan_actions_supported(priv
->mdev
,
3664 MLX5_FS_VLAN_DEPTH
))
3667 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2
;
3669 if (!mlx5_eswitch_vlan_actions_supported(priv
->mdev
, 1) &&
3670 (act
->vlan
.proto
!= htons(ETH_P_8021Q
) ||
3674 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
;
3681 attr
->total_vlan
= vlan_idx
+ 1;
3686 static struct net_device
*get_fdb_out_dev(struct net_device
*uplink_dev
,
3687 struct net_device
*out_dev
)
3689 struct net_device
*fdb_out_dev
= out_dev
;
3690 struct net_device
*uplink_upper
;
3693 uplink_upper
= netdev_master_upper_dev_get_rcu(uplink_dev
);
3694 if (uplink_upper
&& netif_is_lag_master(uplink_upper
) &&
3695 uplink_upper
== out_dev
) {
3696 fdb_out_dev
= uplink_dev
;
3697 } else if (netif_is_lag_master(out_dev
)) {
3698 fdb_out_dev
= bond_option_active_slave_get_rcu(netdev_priv(out_dev
));
3700 (!mlx5e_eswitch_rep(fdb_out_dev
) ||
3701 !netdev_port_same_parent_id(fdb_out_dev
, uplink_dev
)))
3708 static int add_vlan_push_action(struct mlx5e_priv
*priv
,
3709 struct mlx5_flow_attr
*attr
,
3710 struct net_device
**out_dev
,
3713 struct net_device
*vlan_dev
= *out_dev
;
3714 struct flow_action_entry vlan_act
= {
3715 .id
= FLOW_ACTION_VLAN_PUSH
,
3716 .vlan
.vid
= vlan_dev_vlan_id(vlan_dev
),
3717 .vlan
.proto
= vlan_dev_vlan_proto(vlan_dev
),
3722 err
= parse_tc_vlan_action(priv
, &vlan_act
, attr
->esw_attr
, action
);
3727 *out_dev
= dev_get_by_index_rcu(dev_net(vlan_dev
), dev_get_iflink(vlan_dev
));
3732 if (is_vlan_dev(*out_dev
))
3733 err
= add_vlan_push_action(priv
, attr
, out_dev
, action
);
3738 static int add_vlan_pop_action(struct mlx5e_priv
*priv
,
3739 struct mlx5_flow_attr
*attr
,
3742 struct flow_action_entry vlan_act
= {
3743 .id
= FLOW_ACTION_VLAN_POP
,
3745 int nest_level
, err
= 0;
3747 nest_level
= attr
->parse_attr
->filter_dev
->lower_level
-
3748 priv
->netdev
->lower_level
;
3749 while (nest_level
--) {
3750 err
= parse_tc_vlan_action(priv
, &vlan_act
, attr
->esw_attr
, action
);
3758 static bool same_hw_reps(struct mlx5e_priv
*priv
,
3759 struct net_device
*peer_netdev
)
3761 struct mlx5e_priv
*peer_priv
;
3763 peer_priv
= netdev_priv(peer_netdev
);
3765 return mlx5e_eswitch_rep(priv
->netdev
) &&
3766 mlx5e_eswitch_rep(peer_netdev
) &&
3767 same_hw_devs(priv
, peer_priv
);
3770 static bool is_lag_dev(struct mlx5e_priv
*priv
,
3771 struct net_device
*peer_netdev
)
3773 return ((mlx5_lag_is_sriov(priv
->mdev
) ||
3774 mlx5_lag_is_multipath(priv
->mdev
)) &&
3775 same_hw_reps(priv
, peer_netdev
));
3778 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv
*priv
,
3779 struct net_device
*out_dev
)
3781 if (is_merged_eswitch_vfs(priv
, out_dev
))
3784 if (is_lag_dev(priv
, out_dev
))
3787 return mlx5e_eswitch_rep(out_dev
) &&
3788 same_port_devs(priv
, netdev_priv(out_dev
));
3791 static bool is_duplicated_output_device(struct net_device
*dev
,
3792 struct net_device
*out_dev
,
3793 int *ifindexes
, int if_count
,
3794 struct netlink_ext_ack
*extack
)
3798 for (i
= 0; i
< if_count
; i
++) {
3799 if (ifindexes
[i
] == out_dev
->ifindex
) {
3800 NL_SET_ERR_MSG_MOD(extack
,
3801 "can't duplicate output to same device");
3802 netdev_err(dev
, "can't duplicate output to same device: %s\n",
3811 static int verify_uplink_forwarding(struct mlx5e_priv
*priv
,
3812 struct mlx5e_tc_flow
*flow
,
3813 struct net_device
*out_dev
,
3814 struct netlink_ext_ack
*extack
)
3816 struct mlx5_esw_flow_attr
*attr
= flow
->attr
->esw_attr
;
3817 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3818 struct mlx5e_rep_priv
*rep_priv
;
3820 /* Forwarding non encapsulated traffic between
3821 * uplink ports is allowed only if
3822 * termination_table_raw_traffic cap is set.
3824 * Input vport was stored attr->in_rep.
3825 * In LAG case, *priv* is the private data of
3826 * uplink which may be not the input vport.
3828 rep_priv
= mlx5e_rep_to_rep_priv(attr
->in_rep
);
3830 if (!(mlx5e_eswitch_uplink_rep(rep_priv
->netdev
) &&
3831 mlx5e_eswitch_uplink_rep(out_dev
)))
3834 if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw
->dev
,
3835 termination_table_raw_traffic
)) {
3836 NL_SET_ERR_MSG_MOD(extack
,
3837 "devices are both uplink, can't offload forwarding");
3838 pr_err("devices %s %s are both uplink, can't offload forwarding\n",
3839 priv
->netdev
->name
, out_dev
->name
);
3841 } else if (out_dev
!= rep_priv
->netdev
) {
3842 NL_SET_ERR_MSG_MOD(extack
,
3843 "devices are not the same uplink, can't offload forwarding");
3844 pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
3845 priv
->netdev
->name
, out_dev
->name
);
3851 static int parse_tc_fdb_actions(struct mlx5e_priv
*priv
,
3852 struct flow_action
*flow_action
,
3853 struct mlx5e_tc_flow
*flow
,
3854 struct netlink_ext_ack
*extack
)
3856 struct pedit_headers_action hdrs
[2] = {};
3857 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3858 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3859 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3860 struct mlx5e_sample_attr sample_attr
= {};
3861 const struct ip_tunnel_info
*info
= NULL
;
3862 struct mlx5_flow_attr
*attr
= flow
->attr
;
3863 int ifindexes
[MLX5_MAX_FLOW_FWD_VPORTS
];
3864 bool ft_flow
= mlx5e_is_ft_flow(flow
);
3865 const struct flow_action_entry
*act
;
3866 struct mlx5_esw_flow_attr
*esw_attr
;
3867 bool encap
= false, decap
= false;
3868 u32 action
= attr
->action
;
3869 int err
, i
, if_count
= 0;
3870 bool mpls_push
= false;
3872 if (!flow_action_has_entries(flow_action
))
3875 if (!flow_action_hw_stats_check(flow_action
, extack
,
3876 FLOW_ACTION_HW_STATS_DELAYED_BIT
))
3879 esw_attr
= attr
->esw_attr
;
3880 parse_attr
= attr
->parse_attr
;
3882 flow_action_for_each(i
, act
, flow_action
) {
3884 case FLOW_ACTION_ACCEPT
:
3885 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
3886 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3887 attr
->flags
|= MLX5_ESW_ATTR_FLAG_ACCEPT
;
3889 case FLOW_ACTION_DROP
:
3890 action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
|
3891 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3893 case FLOW_ACTION_TRAP
:
3894 if (!flow_offload_has_one_action(flow_action
)) {
3895 NL_SET_ERR_MSG_MOD(extack
,
3896 "action trap is supported as a sole action only");
3899 action
|= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
3900 MLX5_FLOW_CONTEXT_ACTION_COUNT
);
3901 attr
->flags
|= MLX5_ESW_ATTR_FLAG_SLOW_PATH
;
3903 case FLOW_ACTION_MPLS_PUSH
:
3904 if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv
->mdev
,
3905 reformat_l2_to_l3_tunnel
) ||
3906 act
->mpls_push
.proto
!= htons(ETH_P_MPLS_UC
)) {
3907 NL_SET_ERR_MSG_MOD(extack
,
3908 "mpls push is supported only for mpls_uc protocol");
3913 case FLOW_ACTION_MPLS_POP
:
3914 /* we only support mpls pop if it is the first action
3915 * and the filter net device is bareudp. Subsequent
3916 * actions can be pedit and the last can be mirred
3920 NL_SET_ERR_MSG_MOD(extack
,
3921 "mpls pop supported only as first action");
3924 if (!netif_is_bareudp(parse_attr
->filter_dev
)) {
3925 NL_SET_ERR_MSG_MOD(extack
,
3926 "mpls pop supported only on bareudp devices");
3930 parse_attr
->eth
.h_proto
= act
->mpls_pop
.proto
;
3931 action
|= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT
;
3932 flow_flag_set(flow
, L3_TO_L2_DECAP
);
3934 case FLOW_ACTION_MANGLE
:
3935 case FLOW_ACTION_ADD
:
3936 err
= parse_tc_pedit_action(priv
, act
, MLX5_FLOW_NAMESPACE_FDB
,
3937 parse_attr
, hdrs
, flow
, extack
);
3941 if (!flow_flag_test(flow
, L3_TO_L2_DECAP
)) {
3942 action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
3943 esw_attr
->split_count
= esw_attr
->out_count
;
3946 case FLOW_ACTION_CSUM
:
3947 if (csum_offload_supported(priv
, action
,
3948 act
->csum_flags
, extack
))
3952 case FLOW_ACTION_REDIRECT
:
3953 case FLOW_ACTION_MIRRED
: {
3954 struct mlx5e_priv
*out_priv
;
3955 struct net_device
*out_dev
;
3959 /* out_dev is NULL when filters with
3960 * non-existing mirred device are replayed to
3966 if (mpls_push
&& !netif_is_bareudp(out_dev
)) {
3967 NL_SET_ERR_MSG_MOD(extack
,
3968 "mpls is supported only through a bareudp device");
3972 if (ft_flow
&& out_dev
== priv
->netdev
) {
3973 /* Ignore forward to self rules generated
3974 * by adding both mlx5 devs to the flow table
3975 * block on a normal nft offload setup.
3980 if (esw_attr
->out_count
>= MLX5_MAX_FLOW_FWD_VPORTS
) {
3981 NL_SET_ERR_MSG_MOD(extack
,
3982 "can't support more output ports, can't offload forwarding");
3983 netdev_warn(priv
->netdev
,
3984 "can't support more than %d output ports, can't offload forwarding\n",
3985 esw_attr
->out_count
);
3989 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
3990 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3992 parse_attr
->mirred_ifindex
[esw_attr
->out_count
] =
3994 parse_attr
->tun_info
[esw_attr
->out_count
] =
3995 mlx5e_dup_tun_info(info
);
3996 if (!parse_attr
->tun_info
[esw_attr
->out_count
])
3999 esw_attr
->dests
[esw_attr
->out_count
].flags
|=
4000 MLX5_ESW_DEST_ENCAP
;
4001 esw_attr
->out_count
++;
4002 /* attr->dests[].rep is resolved when we
4005 } else if (netdev_port_same_parent_id(priv
->netdev
, out_dev
)) {
4006 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
4007 struct net_device
*uplink_dev
= mlx5_eswitch_uplink_get_proto_dev(esw
, REP_ETH
);
4009 if (is_duplicated_output_device(priv
->netdev
,
4016 ifindexes
[if_count
] = out_dev
->ifindex
;
4019 out_dev
= get_fdb_out_dev(uplink_dev
, out_dev
);
4023 if (is_vlan_dev(out_dev
)) {
4024 err
= add_vlan_push_action(priv
, attr
,
4031 if (is_vlan_dev(parse_attr
->filter_dev
)) {
4032 err
= add_vlan_pop_action(priv
, attr
,
4038 err
= verify_uplink_forwarding(priv
, flow
, out_dev
, extack
);
4042 if (!mlx5e_is_valid_eswitch_fwd_dev(priv
, out_dev
)) {
4043 NL_SET_ERR_MSG_MOD(extack
,
4044 "devices are not on same switch HW, can't offload forwarding");
4048 if (same_vf_reps(priv
, out_dev
)) {
4049 NL_SET_ERR_MSG_MOD(extack
,
4050 "can't forward from a VF to itself");
4054 out_priv
= netdev_priv(out_dev
);
4055 rpriv
= out_priv
->ppriv
;
4056 esw_attr
->dests
[esw_attr
->out_count
].rep
= rpriv
->rep
;
4057 esw_attr
->dests
[esw_attr
->out_count
].mdev
= out_priv
->mdev
;
4058 esw_attr
->out_count
++;
4059 } else if (parse_attr
->filter_dev
!= priv
->netdev
) {
4060 /* All mlx5 devices are called to configure
4061 * high level device filters. Therefore, the
4062 * *attempt* to install a filter on invalid
4063 * eswitch should not trigger an explicit error
4067 NL_SET_ERR_MSG_MOD(extack
,
4068 "devices are not on same switch HW, can't offload forwarding");
4069 netdev_warn(priv
->netdev
,
4070 "devices %s %s not on same switch HW, can't offload forwarding\n",
4077 case FLOW_ACTION_TUNNEL_ENCAP
:
4085 case FLOW_ACTION_VLAN_PUSH
:
4086 case FLOW_ACTION_VLAN_POP
:
4087 if (act
->id
== FLOW_ACTION_VLAN_PUSH
&&
4088 (action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
)) {
4089 /* Replace vlan pop+push with vlan modify */
4090 action
&= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
4091 err
= add_vlan_rewrite_action(priv
,
4092 MLX5_FLOW_NAMESPACE_FDB
,
4093 act
, parse_attr
, hdrs
,
4096 err
= parse_tc_vlan_action(priv
, act
, esw_attr
, &action
);
4101 esw_attr
->split_count
= esw_attr
->out_count
;
4103 case FLOW_ACTION_VLAN_MANGLE
:
4104 err
= add_vlan_rewrite_action(priv
,
4105 MLX5_FLOW_NAMESPACE_FDB
,
4106 act
, parse_attr
, hdrs
,
4111 esw_attr
->split_count
= esw_attr
->out_count
;
4113 case FLOW_ACTION_TUNNEL_DECAP
:
4116 case FLOW_ACTION_GOTO
:
4117 err
= validate_goto_chain(priv
, flow
, act
, action
,
4122 action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
4123 attr
->dest_chain
= act
->chain_index
;
4125 case FLOW_ACTION_CT
:
4126 if (flow_flag_test(flow
, SAMPLE
)) {
4127 NL_SET_ERR_MSG_MOD(extack
, "Sample action with connection tracking is not supported");
4130 err
= mlx5_tc_ct_parse_action(get_ct_priv(priv
), attr
,
4131 &parse_attr
->mod_hdr_acts
,
4136 flow_flag_set(flow
, CT
);
4137 esw_attr
->split_count
= esw_attr
->out_count
;
4139 case FLOW_ACTION_SAMPLE
:
4140 if (flow_flag_test(flow
, CT
)) {
4141 NL_SET_ERR_MSG_MOD(extack
, "Sample action with connection tracking is not supported");
4144 sample_attr
.rate
= act
->sample
.rate
;
4145 sample_attr
.group_num
= act
->sample
.psample_group
->group_num
;
4146 if (act
->sample
.truncate
)
4147 sample_attr
.trunc_size
= act
->sample
.trunc_size
;
4148 flow_flag_set(flow
, SAMPLE
);
4151 NL_SET_ERR_MSG_MOD(extack
, "The offload action is not supported");
4156 /* always set IP version for indirect table handling */
4157 attr
->ip_version
= mlx5e_tc_get_ip_version(&parse_attr
->spec
, true);
4159 if (MLX5_CAP_GEN(esw
->dev
, prio_tag_required
) &&
4160 action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
) {
4161 /* For prio tag mode, replace vlan pop with rewrite vlan prio
4164 action
&= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
4165 err
= add_vlan_prio_tag_rewrite_action(priv
, parse_attr
, hdrs
,
4171 if (hdrs
[TCA_PEDIT_KEY_EX_CMD_SET
].pedits
||
4172 hdrs
[TCA_PEDIT_KEY_EX_CMD_ADD
].pedits
) {
4173 err
= alloc_tc_pedit_action(priv
, MLX5_FLOW_NAMESPACE_FDB
,
4174 parse_attr
, hdrs
, &action
, extack
);
4177 /* in case all pedit actions are skipped, remove the MOD_HDR
4178 * flag. we might have set split_count either by pedit or
4179 * pop/push. if there is no pop/push either, reset it too.
4181 if (parse_attr
->mod_hdr_acts
.num_actions
== 0) {
4182 action
&= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
4183 dealloc_mod_hdr_actions(&parse_attr
->mod_hdr_acts
);
4184 if (!((action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
) ||
4185 (action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
)))
4186 esw_attr
->split_count
= 0;
4190 attr
->action
= action
;
4191 if (!actions_match_supported(priv
, flow_action
, parse_attr
, flow
, extack
))
4194 if (attr
->dest_chain
) {
4196 /* It can be supported if we'll create a mapping for
4197 * the tunnel device only (without tunnel), and set
4198 * this tunnel id with this decap flow.
4200 * On restore (miss), we'll just set this saved tunnel
4204 NL_SET_ERR_MSG(extack
,
4205 "Decap with goto isn't supported");
4206 netdev_warn(priv
->netdev
,
4207 "Decap with goto isn't supported");
4211 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
4214 if (!(attr
->action
&
4215 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
| MLX5_FLOW_CONTEXT_ACTION_DROP
))) {
4216 NL_SET_ERR_MSG_MOD(extack
,
4217 "Rule must have at least one forward/drop action");
4221 if (esw_attr
->split_count
> 0 && !mlx5_esw_has_fwd_fdb(priv
->mdev
)) {
4222 NL_SET_ERR_MSG_MOD(extack
,
4223 "current firmware doesn't support split rule for port mirroring");
4224 netdev_warn_once(priv
->netdev
, "current firmware doesn't support split rule for port mirroring\n");
4228 /* Allocate sample attribute only when there is a sample action and
4229 * no errors after parsing.
4231 if (flow_flag_test(flow
, SAMPLE
)) {
4232 attr
->sample_attr
= kzalloc(sizeof(*attr
->sample_attr
), GFP_KERNEL
);
4233 if (!attr
->sample_attr
)
4235 *attr
->sample_attr
= sample_attr
;
4241 static void get_flags(int flags
, unsigned long *flow_flags
)
4243 unsigned long __flow_flags
= 0;
4245 if (flags
& MLX5_TC_FLAG(INGRESS
))
4246 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_INGRESS
);
4247 if (flags
& MLX5_TC_FLAG(EGRESS
))
4248 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_EGRESS
);
4250 if (flags
& MLX5_TC_FLAG(ESW_OFFLOAD
))
4251 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH
);
4252 if (flags
& MLX5_TC_FLAG(NIC_OFFLOAD
))
4253 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_NIC
);
4254 if (flags
& MLX5_TC_FLAG(FT_OFFLOAD
))
4255 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_FT
);
4257 *flow_flags
= __flow_flags
;
4260 static const struct rhashtable_params tc_ht_params
= {
4261 .head_offset
= offsetof(struct mlx5e_tc_flow
, node
),
4262 .key_offset
= offsetof(struct mlx5e_tc_flow
, cookie
),
4263 .key_len
= sizeof(((struct mlx5e_tc_flow
*)0)->cookie
),
4264 .automatic_shrinking
= true,
4267 static struct rhashtable
*get_tc_ht(struct mlx5e_priv
*priv
,
4268 unsigned long flags
)
4270 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
4271 struct mlx5e_rep_priv
*uplink_rpriv
;
4273 if (flags
& MLX5_TC_FLAG(ESW_OFFLOAD
)) {
4274 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
4275 return &uplink_rpriv
->uplink_priv
.tc_ht
;
4276 } else /* NIC offload */
4277 return &priv
->fs
.tc
.ht
;
4280 static bool is_peer_flow_needed(struct mlx5e_tc_flow
*flow
)
4282 struct mlx5_esw_flow_attr
*esw_attr
= flow
->attr
->esw_attr
;
4283 struct mlx5_flow_attr
*attr
= flow
->attr
;
4284 bool is_rep_ingress
= esw_attr
->in_rep
->vport
!= MLX5_VPORT_UPLINK
&&
4285 flow_flag_test(flow
, INGRESS
);
4286 bool act_is_encap
= !!(attr
->action
&
4287 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT
);
4288 bool esw_paired
= mlx5_devcom_is_paired(esw_attr
->in_mdev
->priv
.devcom
,
4289 MLX5_DEVCOM_ESW_OFFLOADS
);
4294 if ((mlx5_lag_is_sriov(esw_attr
->in_mdev
) ||
4295 mlx5_lag_is_multipath(esw_attr
->in_mdev
)) &&
4296 (is_rep_ingress
|| act_is_encap
))
4302 struct mlx5_flow_attr
*
4303 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type
)
4305 u32 ex_attr_size
= (type
== MLX5_FLOW_NAMESPACE_FDB
) ?
4306 sizeof(struct mlx5_esw_flow_attr
) :
4307 sizeof(struct mlx5_nic_flow_attr
);
4308 struct mlx5_flow_attr
*attr
;
4310 return kzalloc(sizeof(*attr
) + ex_attr_size
, GFP_KERNEL
);
4314 mlx5e_alloc_flow(struct mlx5e_priv
*priv
, int attr_size
,
4315 struct flow_cls_offload
*f
, unsigned long flow_flags
,
4316 struct mlx5e_tc_flow_parse_attr
**__parse_attr
,
4317 struct mlx5e_tc_flow
**__flow
)
4319 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
4320 struct mlx5_flow_attr
*attr
;
4321 struct mlx5e_tc_flow
*flow
;
4325 flow
= kzalloc(sizeof(*flow
), GFP_KERNEL
);
4326 parse_attr
= kvzalloc(sizeof(*parse_attr
), GFP_KERNEL
);
4327 if (!parse_attr
|| !flow
)
4330 flow
->flags
= flow_flags
;
4331 flow
->cookie
= f
->cookie
;
4334 attr
= mlx5_alloc_flow_attr(get_flow_name_space(flow
));
4340 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++)
4341 INIT_LIST_HEAD(&flow
->encaps
[out_index
].list
);
4342 INIT_LIST_HEAD(&flow
->hairpin
);
4343 INIT_LIST_HEAD(&flow
->l3_to_l2_reformat
);
4344 refcount_set(&flow
->refcnt
, 1);
4345 init_completion(&flow
->init_done
);
4346 init_completion(&flow
->del_hw_done
);
4349 *__parse_attr
= parse_attr
;
4360 mlx5e_flow_attr_init(struct mlx5_flow_attr
*attr
,
4361 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
4362 struct flow_cls_offload
*f
)
4364 attr
->parse_attr
= parse_attr
;
4365 attr
->chain
= f
->common
.chain_index
;
4366 attr
->prio
= f
->common
.prio
;
4370 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr
*attr
,
4371 struct mlx5e_priv
*priv
,
4372 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
4373 struct flow_cls_offload
*f
,
4374 struct mlx5_eswitch_rep
*in_rep
,
4375 struct mlx5_core_dev
*in_mdev
)
4377 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
4378 struct mlx5_esw_flow_attr
*esw_attr
= attr
->esw_attr
;
4380 mlx5e_flow_attr_init(attr
, parse_attr
, f
);
4382 esw_attr
->in_rep
= in_rep
;
4383 esw_attr
->in_mdev
= in_mdev
;
4385 if (MLX5_CAP_ESW(esw
->dev
, counter_eswitch_affinity
) ==
4386 MLX5_COUNTER_SOURCE_ESWITCH
)
4387 esw_attr
->counter_dev
= in_mdev
;
4389 esw_attr
->counter_dev
= priv
->mdev
;
4392 static struct mlx5e_tc_flow
*
4393 __mlx5e_add_fdb_flow(struct mlx5e_priv
*priv
,
4394 struct flow_cls_offload
*f
,
4395 unsigned long flow_flags
,
4396 struct net_device
*filter_dev
,
4397 struct mlx5_eswitch_rep
*in_rep
,
4398 struct mlx5_core_dev
*in_mdev
)
4400 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
4401 struct netlink_ext_ack
*extack
= f
->common
.extack
;
4402 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
4403 struct mlx5e_tc_flow
*flow
;
4406 flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH
);
4407 attr_size
= sizeof(struct mlx5_esw_flow_attr
);
4408 err
= mlx5e_alloc_flow(priv
, attr_size
, f
, flow_flags
,
4409 &parse_attr
, &flow
);
4413 parse_attr
->filter_dev
= filter_dev
;
4414 mlx5e_flow_esw_attr_init(flow
->attr
,
4416 f
, in_rep
, in_mdev
);
4418 err
= parse_cls_flower(flow
->priv
, flow
, &parse_attr
->spec
,
4423 /* actions validation depends on parsing the ct matches first */
4424 err
= mlx5_tc_ct_match_add(get_ct_priv(priv
), &parse_attr
->spec
, f
,
4425 &flow
->attr
->ct_attr
, extack
);
4429 err
= parse_tc_fdb_actions(priv
, &rule
->action
, flow
, extack
);
4433 err
= mlx5e_tc_add_fdb_flow(priv
, flow
, extack
);
4434 complete_all(&flow
->init_done
);
4436 if (!(err
== -ENETUNREACH
&& mlx5_lag_is_multipath(in_mdev
)))
4439 add_unready_flow(flow
);
4445 mlx5e_flow_put(priv
, flow
);
4447 return ERR_PTR(err
);
4450 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload
*f
,
4451 struct mlx5e_tc_flow
*flow
,
4452 unsigned long flow_flags
)
4454 struct mlx5e_priv
*priv
= flow
->priv
, *peer_priv
;
4455 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
, *peer_esw
;
4456 struct mlx5_esw_flow_attr
*attr
= flow
->attr
->esw_attr
;
4457 struct mlx5_devcom
*devcom
= priv
->mdev
->priv
.devcom
;
4458 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
4459 struct mlx5e_rep_priv
*peer_urpriv
;
4460 struct mlx5e_tc_flow
*peer_flow
;
4461 struct mlx5_core_dev
*in_mdev
;
4464 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
4468 peer_urpriv
= mlx5_eswitch_get_uplink_priv(peer_esw
, REP_ETH
);
4469 peer_priv
= netdev_priv(peer_urpriv
->netdev
);
4471 /* in_mdev is assigned of which the packet originated from.
4472 * So packets redirected to uplink use the same mdev of the
4473 * original flow and packets redirected from uplink use the
4476 if (attr
->in_rep
->vport
== MLX5_VPORT_UPLINK
)
4477 in_mdev
= peer_priv
->mdev
;
4479 in_mdev
= priv
->mdev
;
4481 parse_attr
= flow
->attr
->parse_attr
;
4482 peer_flow
= __mlx5e_add_fdb_flow(peer_priv
, f
, flow_flags
,
4483 parse_attr
->filter_dev
,
4484 attr
->in_rep
, in_mdev
);
4485 if (IS_ERR(peer_flow
)) {
4486 err
= PTR_ERR(peer_flow
);
4490 flow
->peer_flow
= peer_flow
;
4491 flow_flag_set(flow
, DUP
);
4492 mutex_lock(&esw
->offloads
.peer_mutex
);
4493 list_add_tail(&flow
->peer
, &esw
->offloads
.peer_flows
);
4494 mutex_unlock(&esw
->offloads
.peer_mutex
);
4497 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
4502 mlx5e_add_fdb_flow(struct mlx5e_priv
*priv
,
4503 struct flow_cls_offload
*f
,
4504 unsigned long flow_flags
,
4505 struct net_device
*filter_dev
,
4506 struct mlx5e_tc_flow
**__flow
)
4508 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4509 struct mlx5_eswitch_rep
*in_rep
= rpriv
->rep
;
4510 struct mlx5_core_dev
*in_mdev
= priv
->mdev
;
4511 struct mlx5e_tc_flow
*flow
;
4514 flow
= __mlx5e_add_fdb_flow(priv
, f
, flow_flags
, filter_dev
, in_rep
,
4517 return PTR_ERR(flow
);
4519 if (is_peer_flow_needed(flow
)) {
4520 err
= mlx5e_tc_add_fdb_peer_flow(f
, flow
, flow_flags
);
4522 mlx5e_tc_del_fdb_flow(priv
, flow
);
4536 mlx5e_add_nic_flow(struct mlx5e_priv
*priv
,
4537 struct flow_cls_offload
*f
,
4538 unsigned long flow_flags
,
4539 struct net_device
*filter_dev
,
4540 struct mlx5e_tc_flow
**__flow
)
4542 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
4543 struct netlink_ext_ack
*extack
= f
->common
.extack
;
4544 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
4545 struct mlx5e_tc_flow
*flow
;
4548 if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv
->mdev
, ignore_flow_level
)) {
4549 if (!tc_cls_can_offload_and_chain0(priv
->netdev
, &f
->common
))
4551 } else if (!tc_can_offload_extack(priv
->netdev
, f
->common
.extack
)) {
4555 flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_NIC
);
4556 attr_size
= sizeof(struct mlx5_nic_flow_attr
);
4557 err
= mlx5e_alloc_flow(priv
, attr_size
, f
, flow_flags
,
4558 &parse_attr
, &flow
);
4562 parse_attr
->filter_dev
= filter_dev
;
4563 mlx5e_flow_attr_init(flow
->attr
, parse_attr
, f
);
4565 err
= parse_cls_flower(flow
->priv
, flow
, &parse_attr
->spec
,
4570 err
= mlx5_tc_ct_match_add(get_ct_priv(priv
), &parse_attr
->spec
, f
,
4571 &flow
->attr
->ct_attr
, extack
);
4575 err
= parse_tc_nic_actions(priv
, &rule
->action
, flow
, extack
);
4579 err
= mlx5e_tc_add_nic_flow(priv
, flow
, extack
);
4583 flow_flag_set(flow
, OFFLOADED
);
4589 flow_flag_set(flow
, FAILED
);
4590 dealloc_mod_hdr_actions(&parse_attr
->mod_hdr_acts
);
4591 mlx5e_flow_put(priv
, flow
);
4597 mlx5e_tc_add_flow(struct mlx5e_priv
*priv
,
4598 struct flow_cls_offload
*f
,
4599 unsigned long flags
,
4600 struct net_device
*filter_dev
,
4601 struct mlx5e_tc_flow
**flow
)
4603 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
4604 unsigned long flow_flags
;
4607 get_flags(flags
, &flow_flags
);
4609 if (!tc_can_offload_extack(priv
->netdev
, f
->common
.extack
))
4612 if (esw
&& esw
->mode
== MLX5_ESWITCH_OFFLOADS
)
4613 err
= mlx5e_add_fdb_flow(priv
, f
, flow_flags
,
4616 err
= mlx5e_add_nic_flow(priv
, f
, flow_flags
,
4622 static bool is_flow_rule_duplicate_allowed(struct net_device
*dev
,
4623 struct mlx5e_rep_priv
*rpriv
)
4625 /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4626 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4627 * function is called from NIC mode.
4629 return netif_is_lag_port(dev
) && rpriv
&& rpriv
->rep
->vport
!= MLX5_VPORT_UPLINK
;
4632 int mlx5e_configure_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
4633 struct flow_cls_offload
*f
, unsigned long flags
)
4635 struct netlink_ext_ack
*extack
= f
->common
.extack
;
4636 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
4637 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4638 struct mlx5e_tc_flow
*flow
;
4641 if (!mlx5_esw_hold(priv
->mdev
))
4644 mlx5_esw_get(priv
->mdev
);
4647 flow
= rhashtable_lookup(tc_ht
, &f
->cookie
, tc_ht_params
);
4649 /* Same flow rule offloaded to non-uplink representor sharing tc block,
4652 if (is_flow_rule_duplicate_allowed(dev
, rpriv
) && flow
->orig_dev
!= dev
)
4655 NL_SET_ERR_MSG_MOD(extack
,
4656 "flow cookie already exists, ignoring");
4657 netdev_warn_once(priv
->netdev
,
4658 "flow cookie %lx already exists, ignoring\n",
4668 trace_mlx5e_configure_flower(f
);
4669 err
= mlx5e_tc_add_flow(priv
, f
, flags
, dev
, &flow
);
4673 /* Flow rule offloaded to non-uplink representor sharing tc block,
4674 * set the flow's owner dev.
4676 if (is_flow_rule_duplicate_allowed(dev
, rpriv
))
4677 flow
->orig_dev
= dev
;
4679 err
= rhashtable_lookup_insert_fast(tc_ht
, &flow
->node
, tc_ht_params
);
4683 mlx5_esw_release(priv
->mdev
);
4687 mlx5e_flow_put(priv
, flow
);
4689 mlx5_esw_put(priv
->mdev
);
4690 mlx5_esw_release(priv
->mdev
);
4694 static bool same_flow_direction(struct mlx5e_tc_flow
*flow
, int flags
)
4696 bool dir_ingress
= !!(flags
& MLX5_TC_FLAG(INGRESS
));
4697 bool dir_egress
= !!(flags
& MLX5_TC_FLAG(EGRESS
));
4699 return flow_flag_test(flow
, INGRESS
) == dir_ingress
&&
4700 flow_flag_test(flow
, EGRESS
) == dir_egress
;
4703 int mlx5e_delete_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
4704 struct flow_cls_offload
*f
, unsigned long flags
)
4706 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
4707 struct mlx5e_tc_flow
*flow
;
4711 flow
= rhashtable_lookup(tc_ht
, &f
->cookie
, tc_ht_params
);
4712 if (!flow
|| !same_flow_direction(flow
, flags
)) {
4717 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4720 if (flow_flag_test_and_set(flow
, DELETED
)) {
4724 rhashtable_remove_fast(tc_ht
, &flow
->node
, tc_ht_params
);
4727 trace_mlx5e_delete_flower(f
);
4728 mlx5e_flow_put(priv
, flow
);
4730 mlx5_esw_put(priv
->mdev
);
4738 int mlx5e_stats_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
4739 struct flow_cls_offload
*f
, unsigned long flags
)
4741 struct mlx5_devcom
*devcom
= priv
->mdev
->priv
.devcom
;
4742 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
4743 struct mlx5_eswitch
*peer_esw
;
4744 struct mlx5e_tc_flow
*flow
;
4745 struct mlx5_fc
*counter
;
4752 flow
= mlx5e_flow_get(rhashtable_lookup(tc_ht
, &f
->cookie
,
4756 return PTR_ERR(flow
);
4758 if (!same_flow_direction(flow
, flags
)) {
4763 if (mlx5e_is_offloaded_flow(flow
) || flow_flag_test(flow
, CT
)) {
4764 counter
= mlx5e_tc_get_counter(flow
);
4768 mlx5_fc_query_cached(counter
, &bytes
, &packets
, &lastuse
);
4771 /* Under multipath it's possible for one rule to be currently
4772 * un-offloaded while the other rule is offloaded.
4774 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
4778 if (flow_flag_test(flow
, DUP
) &&
4779 flow_flag_test(flow
->peer_flow
, OFFLOADED
)) {
4784 counter
= mlx5e_tc_get_counter(flow
->peer_flow
);
4786 goto no_peer_counter
;
4787 mlx5_fc_query_cached(counter
, &bytes2
, &packets2
, &lastuse2
);
4790 packets
+= packets2
;
4791 lastuse
= max_t(u64
, lastuse
, lastuse2
);
4795 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
4797 flow_stats_update(&f
->stats
, bytes
, packets
, 0, lastuse
,
4798 FLOW_ACTION_HW_STATS_DELAYED
);
4799 trace_mlx5e_stats_flower(f
);
4801 mlx5e_flow_put(priv
, flow
);
4805 static int apply_police_params(struct mlx5e_priv
*priv
, u64 rate
,
4806 struct netlink_ext_ack
*extack
)
4808 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4809 struct mlx5_eswitch
*esw
;
4814 vport_num
= rpriv
->rep
->vport
;
4815 if (vport_num
>= MLX5_VPORT_ECPF
) {
4816 NL_SET_ERR_MSG_MOD(extack
,
4817 "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4821 esw
= priv
->mdev
->priv
.eswitch
;
4822 /* rate is given in bytes/sec.
4823 * First convert to bits/sec and then round to the nearest mbit/secs.
4824 * mbit means million bits.
4825 * Moreover, if rate is non zero we choose to configure to a minimum of
4829 rate
= (rate
* BITS_PER_BYTE
) + 500000;
4830 do_div(rate
, 1000000);
4831 rate_mbps
= max_t(u32
, rate
, 1);
4834 err
= mlx5_esw_qos_modify_vport_rate(esw
, vport_num
, rate_mbps
);
4836 NL_SET_ERR_MSG_MOD(extack
, "failed applying action to hardware");
4841 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv
*priv
,
4842 struct flow_action
*flow_action
,
4843 struct netlink_ext_ack
*extack
)
4845 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4846 const struct flow_action_entry
*act
;
4850 if (!flow_action_has_entries(flow_action
)) {
4851 NL_SET_ERR_MSG_MOD(extack
, "matchall called with no action");
4855 if (!flow_offload_has_one_action(flow_action
)) {
4856 NL_SET_ERR_MSG_MOD(extack
, "matchall policing support only a single action");
4860 if (!flow_action_basic_hw_stats_check(flow_action
, extack
))
4863 flow_action_for_each(i
, act
, flow_action
) {
4865 case FLOW_ACTION_POLICE
:
4866 if (act
->police
.rate_pkt_ps
) {
4867 NL_SET_ERR_MSG_MOD(extack
, "QoS offload not support packets per second");
4870 err
= apply_police_params(priv
, act
->police
.rate_bytes_ps
, extack
);
4874 rpriv
->prev_vf_vport_stats
= priv
->stats
.vf_vport
;
4877 NL_SET_ERR_MSG_MOD(extack
, "mlx5 supports only police action for matchall");
4885 int mlx5e_tc_configure_matchall(struct mlx5e_priv
*priv
,
4886 struct tc_cls_matchall_offload
*ma
)
4888 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
4889 struct netlink_ext_ack
*extack
= ma
->common
.extack
;
4891 if (!mlx5_esw_qos_enabled(esw
)) {
4892 NL_SET_ERR_MSG_MOD(extack
, "QoS is not supported on this device");
4896 if (ma
->common
.prio
!= 1) {
4897 NL_SET_ERR_MSG_MOD(extack
, "only priority 1 is supported");
4901 return scan_tc_matchall_fdb_actions(priv
, &ma
->rule
->action
, extack
);
4904 int mlx5e_tc_delete_matchall(struct mlx5e_priv
*priv
,
4905 struct tc_cls_matchall_offload
*ma
)
4907 struct netlink_ext_ack
*extack
= ma
->common
.extack
;
4909 return apply_police_params(priv
, 0, extack
);
4912 void mlx5e_tc_stats_matchall(struct mlx5e_priv
*priv
,
4913 struct tc_cls_matchall_offload
*ma
)
4915 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4916 struct rtnl_link_stats64 cur_stats
;
4920 cur_stats
= priv
->stats
.vf_vport
;
4921 dpkts
= cur_stats
.rx_packets
- rpriv
->prev_vf_vport_stats
.rx_packets
;
4922 dbytes
= cur_stats
.rx_bytes
- rpriv
->prev_vf_vport_stats
.rx_bytes
;
4923 rpriv
->prev_vf_vport_stats
= cur_stats
;
4924 flow_stats_update(&ma
->stats
, dbytes
, dpkts
, 0, jiffies
,
4925 FLOW_ACTION_HW_STATS_DELAYED
);
4928 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv
*priv
,
4929 struct mlx5e_priv
*peer_priv
)
4931 struct mlx5_core_dev
*peer_mdev
= peer_priv
->mdev
;
4932 struct mlx5e_hairpin_entry
*hpe
, *tmp
;
4933 LIST_HEAD(init_wait_list
);
4937 if (!same_hw_devs(priv
, peer_priv
))
4940 peer_vhca_id
= MLX5_CAP_GEN(peer_mdev
, vhca_id
);
4942 mutex_lock(&priv
->fs
.tc
.hairpin_tbl_lock
);
4943 hash_for_each(priv
->fs
.tc
.hairpin_tbl
, bkt
, hpe
, hairpin_hlist
)
4944 if (refcount_inc_not_zero(&hpe
->refcnt
))
4945 list_add(&hpe
->dead_peer_wait_list
, &init_wait_list
);
4946 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
4948 list_for_each_entry_safe(hpe
, tmp
, &init_wait_list
, dead_peer_wait_list
) {
4949 wait_for_completion(&hpe
->res_ready
);
4950 if (!IS_ERR_OR_NULL(hpe
->hp
) && hpe
->peer_vhca_id
== peer_vhca_id
)
4951 mlx5_core_hairpin_clear_dead_peer(hpe
->hp
->pair
);
4953 mlx5e_hairpin_put(priv
, hpe
);
4957 static int mlx5e_tc_netdev_event(struct notifier_block
*this,
4958 unsigned long event
, void *ptr
)
4960 struct net_device
*ndev
= netdev_notifier_info_to_dev(ptr
);
4961 struct mlx5e_flow_steering
*fs
;
4962 struct mlx5e_priv
*peer_priv
;
4963 struct mlx5e_tc_table
*tc
;
4964 struct mlx5e_priv
*priv
;
4966 if (ndev
->netdev_ops
!= &mlx5e_netdev_ops
||
4967 event
!= NETDEV_UNREGISTER
||
4968 ndev
->reg_state
== NETREG_REGISTERED
)
4971 tc
= container_of(this, struct mlx5e_tc_table
, netdevice_nb
);
4972 fs
= container_of(tc
, struct mlx5e_flow_steering
, tc
);
4973 priv
= container_of(fs
, struct mlx5e_priv
, fs
);
4974 peer_priv
= netdev_priv(ndev
);
4975 if (priv
== peer_priv
||
4976 !(priv
->netdev
->features
& NETIF_F_HW_TC
))
4979 mlx5e_tc_hairpin_update_dead_peer(priv
, peer_priv
);
4984 static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev
*dev
)
4986 int tc_grp_size
, tc_tbl_size
;
4987 u32 max_flow_counter
;
4989 max_flow_counter
= (MLX5_CAP_GEN(dev
, max_flow_counter_31_16
) << 16) |
4990 MLX5_CAP_GEN(dev
, max_flow_counter_15_0
);
4992 tc_grp_size
= min_t(int, max_flow_counter
, MLX5E_TC_TABLE_MAX_GROUP_SIZE
);
4994 tc_tbl_size
= min_t(int, tc_grp_size
* MLX5E_TC_TABLE_NUM_GROUPS
,
4995 BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev
, log_max_ft_size
)));
5000 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv
*priv
)
5002 struct mlx5_flow_table
**ft
= &priv
->fs
.tc
.miss_t
;
5003 struct mlx5_flow_table_attr ft_attr
= {};
5004 struct mlx5_flow_namespace
*ns
;
5007 ft_attr
.max_fte
= 1;
5008 ft_attr
.autogroup
.max_num_groups
= 1;
5009 ft_attr
.level
= MLX5E_TC_MISS_LEVEL
;
5011 ns
= mlx5_get_flow_namespace(priv
->mdev
, MLX5_FLOW_NAMESPACE_KERNEL
);
5013 *ft
= mlx5_create_auto_grouped_flow_table(ns
, &ft_attr
);
5016 netdev_err(priv
->netdev
, "failed to create tc nic miss table err=%d\n", err
);
5022 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv
*priv
)
5024 mlx5_destroy_flow_table(priv
->fs
.tc
.miss_t
);
5027 int mlx5e_tc_nic_init(struct mlx5e_priv
*priv
)
5029 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
5030 struct mlx5_core_dev
*dev
= priv
->mdev
;
5031 struct mapping_ctx
*chains_mapping
;
5032 struct mlx5_chains_attr attr
= {};
5036 mlx5e_mod_hdr_tbl_init(&tc
->mod_hdr
);
5037 mutex_init(&tc
->t_lock
);
5038 mutex_init(&tc
->hairpin_tbl_lock
);
5039 hash_init(tc
->hairpin_tbl
);
5041 err
= rhashtable_init(&tc
->ht
, &tc_ht_params
);
5045 lockdep_set_class(&tc
->ht
.mutex
, &tc_ht_lock_key
);
5047 mapping_id
= mlx5_query_nic_system_image_guid(dev
);
5049 chains_mapping
= mapping_create_for_id(mapping_id
, MAPPING_TYPE_CHAIN
,
5050 sizeof(struct mlx5_mapped_obj
),
5051 MLX5E_TC_TABLE_CHAIN_TAG_MASK
, true);
5053 if (IS_ERR(chains_mapping
)) {
5054 err
= PTR_ERR(chains_mapping
);
5057 tc
->mapping
= chains_mapping
;
5059 err
= mlx5e_tc_nic_create_miss_table(priv
);
5063 if (MLX5_CAP_FLOWTABLE_NIC_RX(priv
->mdev
, ignore_flow_level
))
5064 attr
.flags
= MLX5_CHAINS_AND_PRIOS_SUPPORTED
|
5065 MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED
;
5066 attr
.ns
= MLX5_FLOW_NAMESPACE_KERNEL
;
5067 attr
.max_ft_sz
= mlx5e_tc_nic_get_ft_size(dev
);
5068 attr
.max_grp_num
= MLX5E_TC_TABLE_NUM_GROUPS
;
5069 attr
.default_ft
= priv
->fs
.tc
.miss_t
;
5070 attr
.mapping
= chains_mapping
;
5072 tc
->chains
= mlx5_chains_create(dev
, &attr
);
5073 if (IS_ERR(tc
->chains
)) {
5074 err
= PTR_ERR(tc
->chains
);
5078 tc
->post_act
= mlx5e_tc_post_act_init(priv
, tc
->chains
, MLX5_FLOW_NAMESPACE_KERNEL
);
5079 tc
->ct
= mlx5_tc_ct_init(priv
, tc
->chains
, &priv
->fs
.tc
.mod_hdr
,
5080 MLX5_FLOW_NAMESPACE_KERNEL
, tc
->post_act
);
5082 tc
->netdevice_nb
.notifier_call
= mlx5e_tc_netdev_event
;
5083 err
= register_netdevice_notifier_dev_net(priv
->netdev
,
5087 tc
->netdevice_nb
.notifier_call
= NULL
;
5088 mlx5_core_warn(priv
->mdev
, "Failed to register netdev notifier\n");
5095 mlx5_tc_ct_clean(tc
->ct
);
5096 mlx5e_tc_post_act_destroy(tc
->post_act
);
5097 mlx5_chains_destroy(tc
->chains
);
5099 mlx5e_tc_nic_destroy_miss_table(priv
);
5101 mapping_destroy(chains_mapping
);
5103 rhashtable_destroy(&tc
->ht
);
5107 static void _mlx5e_tc_del_flow(void *ptr
, void *arg
)
5109 struct mlx5e_tc_flow
*flow
= ptr
;
5110 struct mlx5e_priv
*priv
= flow
->priv
;
5112 mlx5e_tc_del_flow(priv
, flow
);
5116 void mlx5e_tc_nic_cleanup(struct mlx5e_priv
*priv
)
5118 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
5120 if (tc
->netdevice_nb
.notifier_call
)
5121 unregister_netdevice_notifier_dev_net(priv
->netdev
,
5125 mlx5e_mod_hdr_tbl_destroy(&tc
->mod_hdr
);
5126 mutex_destroy(&tc
->hairpin_tbl_lock
);
5128 rhashtable_free_and_destroy(&tc
->ht
, _mlx5e_tc_del_flow
, NULL
);
5130 if (!IS_ERR_OR_NULL(tc
->t
)) {
5131 mlx5_chains_put_table(tc
->chains
, 0, 1, MLX5E_TC_FT_LEVEL
);
5134 mutex_destroy(&tc
->t_lock
);
5136 mlx5_tc_ct_clean(tc
->ct
);
5137 mlx5e_tc_post_act_destroy(tc
->post_act
);
5138 mapping_destroy(tc
->mapping
);
5139 mlx5_chains_destroy(tc
->chains
);
5140 mlx5e_tc_nic_destroy_miss_table(priv
);
5143 int mlx5e_tc_esw_init(struct rhashtable
*tc_ht
)
5145 const size_t sz_enc_opts
= sizeof(struct tunnel_match_enc_opts
);
5146 struct mlx5_rep_uplink_priv
*uplink_priv
;
5147 struct mlx5e_rep_priv
*rpriv
;
5148 struct mapping_ctx
*mapping
;
5149 struct mlx5_eswitch
*esw
;
5150 struct mlx5e_priv
*priv
;
5154 uplink_priv
= container_of(tc_ht
, struct mlx5_rep_uplink_priv
, tc_ht
);
5155 rpriv
= container_of(uplink_priv
, struct mlx5e_rep_priv
, uplink_priv
);
5156 priv
= netdev_priv(rpriv
->netdev
);
5157 esw
= priv
->mdev
->priv
.eswitch
;
5159 uplink_priv
->post_act
= mlx5e_tc_post_act_init(priv
, esw_chains(esw
),
5160 MLX5_FLOW_NAMESPACE_FDB
);
5161 uplink_priv
->ct_priv
= mlx5_tc_ct_init(netdev_priv(priv
->netdev
),
5163 &esw
->offloads
.mod_hdr
,
5164 MLX5_FLOW_NAMESPACE_FDB
,
5165 uplink_priv
->post_act
);
5167 uplink_priv
->int_port_priv
= mlx5e_tc_int_port_init(netdev_priv(priv
->netdev
));
5169 uplink_priv
->tc_psample
= mlx5e_tc_sample_init(esw
, uplink_priv
->post_act
);
5171 mapping_id
= mlx5_query_nic_system_image_guid(esw
->dev
);
5173 mapping
= mapping_create_for_id(mapping_id
, MAPPING_TYPE_TUNNEL
,
5174 sizeof(struct tunnel_match_key
),
5175 TUNNEL_INFO_BITS_MASK
, true);
5177 if (IS_ERR(mapping
)) {
5178 err
= PTR_ERR(mapping
);
5179 goto err_tun_mapping
;
5181 uplink_priv
->tunnel_mapping
= mapping
;
5183 /* 0xFFF is reserved for stack devices slow path table mark */
5184 mapping
= mapping_create_for_id(mapping_id
, MAPPING_TYPE_TUNNEL_ENC_OPTS
,
5185 sz_enc_opts
, ENC_OPTS_BITS_MASK
- 1, true);
5186 if (IS_ERR(mapping
)) {
5187 err
= PTR_ERR(mapping
);
5188 goto err_enc_opts_mapping
;
5190 uplink_priv
->tunnel_enc_opts_mapping
= mapping
;
5192 err
= rhashtable_init(tc_ht
, &tc_ht_params
);
5196 lockdep_set_class(&tc_ht
->mutex
, &tc_ht_lock_key
);
5198 uplink_priv
->encap
= mlx5e_tc_tun_init(priv
);
5199 if (IS_ERR(uplink_priv
->encap
)) {
5200 err
= PTR_ERR(uplink_priv
->encap
);
5201 goto err_register_fib_notifier
;
5206 err_register_fib_notifier
:
5207 rhashtable_destroy(tc_ht
);
5209 mapping_destroy(uplink_priv
->tunnel_enc_opts_mapping
);
5210 err_enc_opts_mapping
:
5211 mapping_destroy(uplink_priv
->tunnel_mapping
);
5213 mlx5e_tc_sample_cleanup(uplink_priv
->tc_psample
);
5214 mlx5e_tc_int_port_cleanup(uplink_priv
->int_port_priv
);
5215 mlx5_tc_ct_clean(uplink_priv
->ct_priv
);
5216 netdev_warn(priv
->netdev
,
5217 "Failed to initialize tc (eswitch), err: %d", err
);
5218 mlx5e_tc_post_act_destroy(uplink_priv
->post_act
);
5222 void mlx5e_tc_esw_cleanup(struct rhashtable
*tc_ht
)
5224 struct mlx5_rep_uplink_priv
*uplink_priv
;
5226 uplink_priv
= container_of(tc_ht
, struct mlx5_rep_uplink_priv
, tc_ht
);
5228 rhashtable_free_and_destroy(tc_ht
, _mlx5e_tc_del_flow
, NULL
);
5229 mlx5e_tc_tun_cleanup(uplink_priv
->encap
);
5231 mapping_destroy(uplink_priv
->tunnel_enc_opts_mapping
);
5232 mapping_destroy(uplink_priv
->tunnel_mapping
);
5234 mlx5e_tc_sample_cleanup(uplink_priv
->tc_psample
);
5235 mlx5e_tc_int_port_cleanup(uplink_priv
->int_port_priv
);
5236 mlx5_tc_ct_clean(uplink_priv
->ct_priv
);
5237 mlx5e_tc_post_act_destroy(uplink_priv
->post_act
);
5240 int mlx5e_tc_num_filters(struct mlx5e_priv
*priv
, unsigned long flags
)
5242 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
5244 return atomic_read(&tc_ht
->nelems
);
5247 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch
*esw
)
5249 struct mlx5e_tc_flow
*flow
, *tmp
;
5251 list_for_each_entry_safe(flow
, tmp
, &esw
->offloads
.peer_flows
, peer
)
5252 __mlx5e_tc_del_fdb_peer_flow(flow
);
5255 void mlx5e_tc_reoffload_flows_work(struct work_struct
*work
)
5257 struct mlx5_rep_uplink_priv
*rpriv
=
5258 container_of(work
, struct mlx5_rep_uplink_priv
,
5259 reoffload_flows_work
);
5260 struct mlx5e_tc_flow
*flow
, *tmp
;
5262 mutex_lock(&rpriv
->unready_flows_lock
);
5263 list_for_each_entry_safe(flow
, tmp
, &rpriv
->unready_flows
, unready
) {
5264 if (!mlx5e_tc_add_fdb_flow(flow
->priv
, flow
, NULL
))
5265 unready_flow_del(flow
);
5267 mutex_unlock(&rpriv
->unready_flows_lock
);
5270 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv
*priv
,
5271 struct flow_cls_offload
*cls_flower
,
5272 unsigned long flags
)
5274 switch (cls_flower
->command
) {
5275 case FLOW_CLS_REPLACE
:
5276 return mlx5e_configure_flower(priv
->netdev
, priv
, cls_flower
,
5278 case FLOW_CLS_DESTROY
:
5279 return mlx5e_delete_flower(priv
->netdev
, priv
, cls_flower
,
5281 case FLOW_CLS_STATS
:
5282 return mlx5e_stats_flower(priv
->netdev
, priv
, cls_flower
,
5289 int mlx5e_setup_tc_block_cb(enum tc_setup_type type
, void *type_data
,
5292 unsigned long flags
= MLX5_TC_FLAG(INGRESS
);
5293 struct mlx5e_priv
*priv
= cb_priv
;
5295 if (!priv
->netdev
|| !netif_device_present(priv
->netdev
))
5298 if (mlx5e_is_uplink_rep(priv
))
5299 flags
|= MLX5_TC_FLAG(ESW_OFFLOAD
);
5301 flags
|= MLX5_TC_FLAG(NIC_OFFLOAD
);
5304 case TC_SETUP_CLSFLOWER
:
5305 return mlx5e_setup_tc_cls_flower(priv
, type_data
, flags
);
5311 bool mlx5e_tc_update_skb(struct mlx5_cqe64
*cqe
,
5312 struct sk_buff
*skb
)
5314 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5315 u32 chain
= 0, chain_tag
, reg_b
, zone_restore_id
;
5316 struct mlx5e_priv
*priv
= netdev_priv(skb
->dev
);
5317 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
5318 struct mlx5_mapped_obj mapped_obj
;
5319 struct tc_skb_ext
*tc_skb_ext
;
5322 reg_b
= be32_to_cpu(cqe
->ft_metadata
);
5324 chain_tag
= reg_b
& MLX5E_TC_TABLE_CHAIN_TAG_MASK
;
5326 err
= mapping_find(tc
->mapping
, chain_tag
, &mapped_obj
);
5328 netdev_dbg(priv
->netdev
,
5329 "Couldn't find chain for chain tag: %d, err: %d\n",
5334 if (mapped_obj
.type
== MLX5_MAPPED_OBJ_CHAIN
) {
5335 chain
= mapped_obj
.chain
;
5336 tc_skb_ext
= tc_skb_ext_alloc(skb
);
5337 if (WARN_ON(!tc_skb_ext
))
5340 tc_skb_ext
->chain
= chain
;
5342 zone_restore_id
= (reg_b
>> REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG
)) &
5345 if (!mlx5e_tc_ct_restore_flow(tc
->ct
, skb
,
5349 netdev_dbg(priv
->netdev
, "Invalid mapped object type: %d\n", mapped_obj
.type
);
5352 #endif /* CONFIG_NET_TC_SKB_EXT */