2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <linux/refcount.h>
42 #include <linux/completion.h>
43 #include <net/tc_act/tc_mirred.h>
44 #include <net/tc_act/tc_vlan.h>
45 #include <net/tc_act/tc_tunnel_key.h>
46 #include <net/tc_act/tc_pedit.h>
47 #include <net/tc_act/tc_csum.h>
49 #include <net/ipv6_stubs.h>
56 #include "en/tc_tun.h"
57 #include "lib/devcom.h"
58 #include "lib/geneve.h"
59 #include "diag/en_tc_tracepoint.h"
61 struct mlx5_nic_flow_attr
{
64 struct mlx5_modify_hdr
*modify_hdr
;
67 struct mlx5_flow_table
*hairpin_ft
;
68 struct mlx5_fc
*counter
;
71 #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
74 MLX5E_TC_FLOW_FLAG_INGRESS
= MLX5E_TC_FLAG_INGRESS_BIT
,
75 MLX5E_TC_FLOW_FLAG_EGRESS
= MLX5E_TC_FLAG_EGRESS_BIT
,
76 MLX5E_TC_FLOW_FLAG_ESWITCH
= MLX5E_TC_FLAG_ESW_OFFLOAD_BIT
,
77 MLX5E_TC_FLOW_FLAG_NIC
= MLX5E_TC_FLAG_NIC_OFFLOAD_BIT
,
78 MLX5E_TC_FLOW_FLAG_OFFLOADED
= MLX5E_TC_FLOW_BASE
,
79 MLX5E_TC_FLOW_FLAG_HAIRPIN
= MLX5E_TC_FLOW_BASE
+ 1,
80 MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS
= MLX5E_TC_FLOW_BASE
+ 2,
81 MLX5E_TC_FLOW_FLAG_SLOW
= MLX5E_TC_FLOW_BASE
+ 3,
82 MLX5E_TC_FLOW_FLAG_DUP
= MLX5E_TC_FLOW_BASE
+ 4,
83 MLX5E_TC_FLOW_FLAG_NOT_READY
= MLX5E_TC_FLOW_BASE
+ 5,
84 MLX5E_TC_FLOW_FLAG_DELETED
= MLX5E_TC_FLOW_BASE
+ 6,
87 #define MLX5E_TC_MAX_SPLITS 1
89 /* Helper struct for accessing a struct containing list_head array.
98 * To access the containing struct from one of the list_head items:
99 * 1. Get the helper item from the list_head item using
101 * container_of(list_head item, helper struct type, list_head field)
102 * 2. Get the contining struct from the helper item and its index in the array:
103 * containing struct =
104 * container_of(helper item, containing struct type, helper field[index])
106 struct encap_flow_item
{
107 struct mlx5e_encap_entry
*e
; /* attached encap instance */
108 struct list_head list
;
112 struct mlx5e_tc_flow
{
113 struct rhash_head node
;
114 struct mlx5e_priv
*priv
;
117 struct mlx5_flow_handle
*rule
[MLX5E_TC_MAX_SPLITS
+ 1];
118 /* Flow can be associated with multiple encap IDs.
119 * The number of encaps is bounded by the number of supported
122 struct encap_flow_item encaps
[MLX5_MAX_FLOW_FWD_VPORTS
];
123 struct mlx5e_tc_flow
*peer_flow
;
124 struct mlx5e_mod_hdr_entry
*mh
; /* attached mod header instance */
125 struct list_head mod_hdr
; /* flows sharing the same mod hdr ID */
126 struct mlx5e_hairpin_entry
*hpe
; /* attached hairpin instance */
127 struct list_head hairpin
; /* flows sharing the same hairpin */
128 struct list_head peer
; /* flows with peer flow */
129 struct list_head unready
; /* flows not ready to be offloaded (e.g due to missing route) */
131 struct list_head tmp_list
; /* temporary flow list used by neigh update */
133 struct rcu_head rcu_head
;
134 struct completion init_done
;
136 struct mlx5_esw_flow_attr esw_attr
[0];
137 struct mlx5_nic_flow_attr nic_attr
[0];
141 struct mlx5e_tc_flow_parse_attr
{
142 const struct ip_tunnel_info
*tun_info
[MLX5_MAX_FLOW_FWD_VPORTS
];
143 struct net_device
*filter_dev
;
144 struct mlx5_flow_spec spec
;
145 int num_mod_hdr_actions
;
146 int max_mod_hdr_actions
;
147 void *mod_hdr_actions
;
148 int mirred_ifindex
[MLX5_MAX_FLOW_FWD_VPORTS
];
151 #define MLX5E_TC_TABLE_NUM_GROUPS 4
152 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
154 struct mlx5e_hairpin
{
155 struct mlx5_hairpin
*pair
;
157 struct mlx5_core_dev
*func_mdev
;
158 struct mlx5e_priv
*func_priv
;
163 struct mlx5e_rqt indir_rqt
;
164 u32 indir_tirn
[MLX5E_NUM_INDIR_TIRS
];
165 struct mlx5e_ttc_table ttc
;
168 struct mlx5e_hairpin_entry
{
169 /* a node of a hash table which keeps all the hairpin entries */
170 struct hlist_node hairpin_hlist
;
172 /* protects flows list */
173 spinlock_t flows_lock
;
174 /* flows sharing the same hairpin */
175 struct list_head flows
;
176 /* hpe's that were not fully initialized when dead peer update event
177 * function traversed them.
179 struct list_head dead_peer_wait_list
;
183 struct mlx5e_hairpin
*hp
;
185 struct completion res_ready
;
193 struct mlx5e_mod_hdr_entry
{
194 /* a node of a hash table which keeps all the mod_hdr entries */
195 struct hlist_node mod_hdr_hlist
;
197 /* protects flows list */
198 spinlock_t flows_lock
;
199 /* flows sharing the same mod_hdr entry */
200 struct list_head flows
;
202 struct mod_hdr_key key
;
204 struct mlx5_modify_hdr
*modify_hdr
;
207 struct completion res_ready
;
211 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
213 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
214 struct mlx5e_tc_flow
*flow
);
216 static struct mlx5e_tc_flow
*mlx5e_flow_get(struct mlx5e_tc_flow
*flow
)
218 if (!flow
|| !refcount_inc_not_zero(&flow
->refcnt
))
219 return ERR_PTR(-EINVAL
);
223 static void mlx5e_flow_put(struct mlx5e_priv
*priv
,
224 struct mlx5e_tc_flow
*flow
)
226 if (refcount_dec_and_test(&flow
->refcnt
)) {
227 mlx5e_tc_del_flow(priv
, flow
);
228 kfree_rcu(flow
, rcu_head
);
232 static void __flow_flag_set(struct mlx5e_tc_flow
*flow
, unsigned long flag
)
234 /* Complete all memory stores before setting bit. */
235 smp_mb__before_atomic();
236 set_bit(flag
, &flow
->flags
);
239 #define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
241 static bool __flow_flag_test_and_set(struct mlx5e_tc_flow
*flow
,
244 /* test_and_set_bit() provides all necessary barriers */
245 return test_and_set_bit(flag
, &flow
->flags
);
248 #define flow_flag_test_and_set(flow, flag) \
249 __flow_flag_test_and_set(flow, \
250 MLX5E_TC_FLOW_FLAG_##flag)
252 static void __flow_flag_clear(struct mlx5e_tc_flow
*flow
, unsigned long flag
)
254 /* Complete all memory stores before clearing bit. */
255 smp_mb__before_atomic();
256 clear_bit(flag
, &flow
->flags
);
259 #define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
260 MLX5E_TC_FLOW_FLAG_##flag)
262 static bool __flow_flag_test(struct mlx5e_tc_flow
*flow
, unsigned long flag
)
264 bool ret
= test_bit(flag
, &flow
->flags
);
266 /* Read fields of flow structure only after checking flags. */
267 smp_mb__after_atomic();
271 #define flow_flag_test(flow, flag) __flow_flag_test(flow, \
272 MLX5E_TC_FLOW_FLAG_##flag)
274 static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow
*flow
)
276 return flow_flag_test(flow
, ESWITCH
);
279 static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow
*flow
)
281 return flow_flag_test(flow
, OFFLOADED
);
284 static inline u32
hash_mod_hdr_info(struct mod_hdr_key
*key
)
286 return jhash(key
->actions
,
287 key
->num_actions
* MLX5_MH_ACT_SZ
, 0);
290 static inline int cmp_mod_hdr_info(struct mod_hdr_key
*a
,
291 struct mod_hdr_key
*b
)
293 if (a
->num_actions
!= b
->num_actions
)
296 return memcmp(a
->actions
, b
->actions
, a
->num_actions
* MLX5_MH_ACT_SZ
);
299 static struct mod_hdr_tbl
*
300 get_mod_hdr_table(struct mlx5e_priv
*priv
, int namespace)
302 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
304 return namespace == MLX5_FLOW_NAMESPACE_FDB
? &esw
->offloads
.mod_hdr
:
305 &priv
->fs
.tc
.mod_hdr
;
308 static struct mlx5e_mod_hdr_entry
*
309 mlx5e_mod_hdr_get(struct mod_hdr_tbl
*tbl
, struct mod_hdr_key
*key
, u32 hash_key
)
311 struct mlx5e_mod_hdr_entry
*mh
, *found
= NULL
;
313 hash_for_each_possible(tbl
->hlist
, mh
, mod_hdr_hlist
, hash_key
) {
314 if (!cmp_mod_hdr_info(&mh
->key
, key
)) {
315 refcount_inc(&mh
->refcnt
);
324 static void mlx5e_mod_hdr_put(struct mlx5e_priv
*priv
,
325 struct mlx5e_mod_hdr_entry
*mh
,
328 struct mod_hdr_tbl
*tbl
= get_mod_hdr_table(priv
, namespace);
330 if (!refcount_dec_and_mutex_lock(&mh
->refcnt
, &tbl
->lock
))
332 hash_del(&mh
->mod_hdr_hlist
);
333 mutex_unlock(&tbl
->lock
);
335 WARN_ON(!list_empty(&mh
->flows
));
336 if (mh
->compl_result
> 0)
337 mlx5_modify_header_dealloc(priv
->mdev
, mh
->modify_hdr
);
342 static int get_flow_name_space(struct mlx5e_tc_flow
*flow
)
344 return mlx5e_is_eswitch_flow(flow
) ?
345 MLX5_FLOW_NAMESPACE_FDB
: MLX5_FLOW_NAMESPACE_KERNEL
;
347 static int mlx5e_attach_mod_hdr(struct mlx5e_priv
*priv
,
348 struct mlx5e_tc_flow
*flow
,
349 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
351 int num_actions
, actions_size
, namespace, err
;
352 struct mlx5e_mod_hdr_entry
*mh
;
353 struct mod_hdr_tbl
*tbl
;
354 struct mod_hdr_key key
;
357 num_actions
= parse_attr
->num_mod_hdr_actions
;
358 actions_size
= MLX5_MH_ACT_SZ
* num_actions
;
360 key
.actions
= parse_attr
->mod_hdr_actions
;
361 key
.num_actions
= num_actions
;
363 hash_key
= hash_mod_hdr_info(&key
);
365 namespace = get_flow_name_space(flow
);
366 tbl
= get_mod_hdr_table(priv
, namespace);
368 mutex_lock(&tbl
->lock
);
369 mh
= mlx5e_mod_hdr_get(tbl
, &key
, hash_key
);
371 mutex_unlock(&tbl
->lock
);
372 wait_for_completion(&mh
->res_ready
);
374 if (mh
->compl_result
< 0) {
376 goto attach_header_err
;
381 mh
= kzalloc(sizeof(*mh
) + actions_size
, GFP_KERNEL
);
383 mutex_unlock(&tbl
->lock
);
387 mh
->key
.actions
= (void *)mh
+ sizeof(*mh
);
388 memcpy(mh
->key
.actions
, key
.actions
, actions_size
);
389 mh
->key
.num_actions
= num_actions
;
390 spin_lock_init(&mh
->flows_lock
);
391 INIT_LIST_HEAD(&mh
->flows
);
392 refcount_set(&mh
->refcnt
, 1);
393 init_completion(&mh
->res_ready
);
395 hash_add(tbl
->hlist
, &mh
->mod_hdr_hlist
, hash_key
);
396 mutex_unlock(&tbl
->lock
);
398 mh
->modify_hdr
= mlx5_modify_header_alloc(priv
->mdev
, namespace,
401 if (IS_ERR(mh
->modify_hdr
)) {
402 err
= PTR_ERR(mh
->modify_hdr
);
403 mh
->compl_result
= err
;
404 goto alloc_header_err
;
406 mh
->compl_result
= 1;
407 complete_all(&mh
->res_ready
);
411 spin_lock(&mh
->flows_lock
);
412 list_add(&flow
->mod_hdr
, &mh
->flows
);
413 spin_unlock(&mh
->flows_lock
);
414 if (mlx5e_is_eswitch_flow(flow
))
415 flow
->esw_attr
->modify_hdr
= mh
->modify_hdr
;
417 flow
->nic_attr
->modify_hdr
= mh
->modify_hdr
;
422 complete_all(&mh
->res_ready
);
424 mlx5e_mod_hdr_put(priv
, mh
, namespace);
428 static void mlx5e_detach_mod_hdr(struct mlx5e_priv
*priv
,
429 struct mlx5e_tc_flow
*flow
)
431 /* flow wasn't fully initialized */
435 spin_lock(&flow
->mh
->flows_lock
);
436 list_del(&flow
->mod_hdr
);
437 spin_unlock(&flow
->mh
->flows_lock
);
439 mlx5e_mod_hdr_put(priv
, flow
->mh
, get_flow_name_space(flow
));
444 struct mlx5_core_dev
*mlx5e_hairpin_get_mdev(struct net
*net
, int ifindex
)
446 struct net_device
*netdev
;
447 struct mlx5e_priv
*priv
;
449 netdev
= __dev_get_by_index(net
, ifindex
);
450 priv
= netdev_priv(netdev
);
454 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin
*hp
)
456 u32 in
[MLX5_ST_SZ_DW(create_tir_in
)] = {0};
460 err
= mlx5_core_alloc_transport_domain(hp
->func_mdev
, &hp
->tdn
);
464 tirc
= MLX5_ADDR_OF(create_tir_in
, in
, ctx
);
466 MLX5_SET(tirc
, tirc
, disp_type
, MLX5_TIRC_DISP_TYPE_DIRECT
);
467 MLX5_SET(tirc
, tirc
, inline_rqn
, hp
->pair
->rqn
[0]);
468 MLX5_SET(tirc
, tirc
, transport_domain
, hp
->tdn
);
470 err
= mlx5_core_create_tir(hp
->func_mdev
, in
, MLX5_ST_SZ_BYTES(create_tir_in
), &hp
->tirn
);
477 mlx5_core_dealloc_transport_domain(hp
->func_mdev
, hp
->tdn
);
482 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin
*hp
)
484 mlx5_core_destroy_tir(hp
->func_mdev
, hp
->tirn
);
485 mlx5_core_dealloc_transport_domain(hp
->func_mdev
, hp
->tdn
);
488 static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin
*hp
, void *rqtc
)
490 u32 indirection_rqt
[MLX5E_INDIR_RQT_SIZE
], rqn
;
491 struct mlx5e_priv
*priv
= hp
->func_priv
;
492 int i
, ix
, sz
= MLX5E_INDIR_RQT_SIZE
;
494 mlx5e_build_default_indir_rqt(indirection_rqt
, sz
,
497 for (i
= 0; i
< sz
; i
++) {
499 if (priv
->rss_params
.hfunc
== ETH_RSS_HASH_XOR
)
500 ix
= mlx5e_bits_invert(i
, ilog2(sz
));
501 ix
= indirection_rqt
[ix
];
502 rqn
= hp
->pair
->rqn
[ix
];
503 MLX5_SET(rqtc
, rqtc
, rq_num
[i
], rqn
);
507 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin
*hp
)
509 int inlen
, err
, sz
= MLX5E_INDIR_RQT_SIZE
;
510 struct mlx5e_priv
*priv
= hp
->func_priv
;
511 struct mlx5_core_dev
*mdev
= priv
->mdev
;
515 inlen
= MLX5_ST_SZ_BYTES(create_rqt_in
) + sizeof(u32
) * sz
;
516 in
= kvzalloc(inlen
, GFP_KERNEL
);
520 rqtc
= MLX5_ADDR_OF(create_rqt_in
, in
, rqt_context
);
522 MLX5_SET(rqtc
, rqtc
, rqt_actual_size
, sz
);
523 MLX5_SET(rqtc
, rqtc
, rqt_max_size
, sz
);
525 mlx5e_hairpin_fill_rqt_rqns(hp
, rqtc
);
527 err
= mlx5_core_create_rqt(mdev
, in
, inlen
, &hp
->indir_rqt
.rqtn
);
529 hp
->indir_rqt
.enabled
= true;
535 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin
*hp
)
537 struct mlx5e_priv
*priv
= hp
->func_priv
;
538 u32 in
[MLX5_ST_SZ_DW(create_tir_in
)];
542 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++) {
543 struct mlx5e_tirc_config ttconfig
= mlx5e_tirc_get_default_config(tt
);
545 memset(in
, 0, MLX5_ST_SZ_BYTES(create_tir_in
));
546 tirc
= MLX5_ADDR_OF(create_tir_in
, in
, ctx
);
548 MLX5_SET(tirc
, tirc
, transport_domain
, hp
->tdn
);
549 MLX5_SET(tirc
, tirc
, disp_type
, MLX5_TIRC_DISP_TYPE_INDIRECT
);
550 MLX5_SET(tirc
, tirc
, indirect_table
, hp
->indir_rqt
.rqtn
);
551 mlx5e_build_indir_tir_ctx_hash(&priv
->rss_params
, &ttconfig
, tirc
, false);
553 err
= mlx5_core_create_tir(hp
->func_mdev
, in
,
554 MLX5_ST_SZ_BYTES(create_tir_in
), &hp
->indir_tirn
[tt
]);
556 mlx5_core_warn(hp
->func_mdev
, "create indirect tirs failed, %d\n", err
);
557 goto err_destroy_tirs
;
563 for (i
= 0; i
< tt
; i
++)
564 mlx5_core_destroy_tir(hp
->func_mdev
, hp
->indir_tirn
[i
]);
568 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin
*hp
)
572 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++)
573 mlx5_core_destroy_tir(hp
->func_mdev
, hp
->indir_tirn
[tt
]);
576 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin
*hp
,
577 struct ttc_params
*ttc_params
)
579 struct mlx5_flow_table_attr
*ft_attr
= &ttc_params
->ft_attr
;
582 memset(ttc_params
, 0, sizeof(*ttc_params
));
584 ttc_params
->any_tt_tirn
= hp
->tirn
;
586 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++)
587 ttc_params
->indir_tirn
[tt
] = hp
->indir_tirn
[tt
];
589 ft_attr
->max_fte
= MLX5E_NUM_TT
;
590 ft_attr
->level
= MLX5E_TC_TTC_FT_LEVEL
;
591 ft_attr
->prio
= MLX5E_TC_PRIO
;
594 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin
*hp
)
596 struct mlx5e_priv
*priv
= hp
->func_priv
;
597 struct ttc_params ttc_params
;
600 err
= mlx5e_hairpin_create_indirect_rqt(hp
);
604 err
= mlx5e_hairpin_create_indirect_tirs(hp
);
606 goto err_create_indirect_tirs
;
608 mlx5e_hairpin_set_ttc_params(hp
, &ttc_params
);
609 err
= mlx5e_create_ttc_table(priv
, &ttc_params
, &hp
->ttc
);
611 goto err_create_ttc_table
;
613 netdev_dbg(priv
->netdev
, "add hairpin: using %d channels rss ttc table id %x\n",
614 hp
->num_channels
, hp
->ttc
.ft
.t
->id
);
618 err_create_ttc_table
:
619 mlx5e_hairpin_destroy_indirect_tirs(hp
);
620 err_create_indirect_tirs
:
621 mlx5e_destroy_rqt(priv
, &hp
->indir_rqt
);
626 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin
*hp
)
628 struct mlx5e_priv
*priv
= hp
->func_priv
;
630 mlx5e_destroy_ttc_table(priv
, &hp
->ttc
);
631 mlx5e_hairpin_destroy_indirect_tirs(hp
);
632 mlx5e_destroy_rqt(priv
, &hp
->indir_rqt
);
635 static struct mlx5e_hairpin
*
636 mlx5e_hairpin_create(struct mlx5e_priv
*priv
, struct mlx5_hairpin_params
*params
,
639 struct mlx5_core_dev
*func_mdev
, *peer_mdev
;
640 struct mlx5e_hairpin
*hp
;
641 struct mlx5_hairpin
*pair
;
644 hp
= kzalloc(sizeof(*hp
), GFP_KERNEL
);
646 return ERR_PTR(-ENOMEM
);
648 func_mdev
= priv
->mdev
;
649 peer_mdev
= mlx5e_hairpin_get_mdev(dev_net(priv
->netdev
), peer_ifindex
);
651 pair
= mlx5_core_hairpin_create(func_mdev
, peer_mdev
, params
);
654 goto create_pair_err
;
657 hp
->func_mdev
= func_mdev
;
658 hp
->func_priv
= priv
;
659 hp
->num_channels
= params
->num_channels
;
661 err
= mlx5e_hairpin_create_transport(hp
);
663 goto create_transport_err
;
665 if (hp
->num_channels
> 1) {
666 err
= mlx5e_hairpin_rss_init(hp
);
674 mlx5e_hairpin_destroy_transport(hp
);
675 create_transport_err
:
676 mlx5_core_hairpin_destroy(hp
->pair
);
682 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin
*hp
)
684 if (hp
->num_channels
> 1)
685 mlx5e_hairpin_rss_cleanup(hp
);
686 mlx5e_hairpin_destroy_transport(hp
);
687 mlx5_core_hairpin_destroy(hp
->pair
);
691 static inline u32
hash_hairpin_info(u16 peer_vhca_id
, u8 prio
)
693 return (peer_vhca_id
<< 16 | prio
);
696 static struct mlx5e_hairpin_entry
*mlx5e_hairpin_get(struct mlx5e_priv
*priv
,
697 u16 peer_vhca_id
, u8 prio
)
699 struct mlx5e_hairpin_entry
*hpe
;
700 u32 hash_key
= hash_hairpin_info(peer_vhca_id
, prio
);
702 hash_for_each_possible(priv
->fs
.tc
.hairpin_tbl
, hpe
,
703 hairpin_hlist
, hash_key
) {
704 if (hpe
->peer_vhca_id
== peer_vhca_id
&& hpe
->prio
== prio
) {
705 refcount_inc(&hpe
->refcnt
);
713 static void mlx5e_hairpin_put(struct mlx5e_priv
*priv
,
714 struct mlx5e_hairpin_entry
*hpe
)
716 /* no more hairpin flows for us, release the hairpin pair */
717 if (!refcount_dec_and_mutex_lock(&hpe
->refcnt
, &priv
->fs
.tc
.hairpin_tbl_lock
))
719 hash_del(&hpe
->hairpin_hlist
);
720 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
722 if (!IS_ERR_OR_NULL(hpe
->hp
)) {
723 netdev_dbg(priv
->netdev
, "del hairpin: peer %s\n",
724 dev_name(hpe
->hp
->pair
->peer_mdev
->device
));
726 mlx5e_hairpin_destroy(hpe
->hp
);
729 WARN_ON(!list_empty(&hpe
->flows
));
733 #define UNKNOWN_MATCH_PRIO 8
735 static int mlx5e_hairpin_get_prio(struct mlx5e_priv
*priv
,
736 struct mlx5_flow_spec
*spec
, u8
*match_prio
,
737 struct netlink_ext_ack
*extack
)
739 void *headers_c
, *headers_v
;
740 u8 prio_val
, prio_mask
= 0;
743 #ifdef CONFIG_MLX5_CORE_EN_DCB
744 if (priv
->dcbx_dp
.trust_state
!= MLX5_QPTS_TRUST_PCP
) {
745 NL_SET_ERR_MSG_MOD(extack
,
746 "only PCP trust state supported for hairpin");
750 headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
, outer_headers
);
751 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
, outer_headers
);
753 vlan_present
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
);
755 prio_mask
= MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, first_prio
);
756 prio_val
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, first_prio
);
759 if (!vlan_present
|| !prio_mask
) {
760 prio_val
= UNKNOWN_MATCH_PRIO
;
761 } else if (prio_mask
!= 0x7) {
762 NL_SET_ERR_MSG_MOD(extack
,
763 "masked priority match not supported for hairpin");
767 *match_prio
= prio_val
;
771 static int mlx5e_hairpin_flow_add(struct mlx5e_priv
*priv
,
772 struct mlx5e_tc_flow
*flow
,
773 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
774 struct netlink_ext_ack
*extack
)
776 int peer_ifindex
= parse_attr
->mirred_ifindex
[0];
777 struct mlx5_hairpin_params params
;
778 struct mlx5_core_dev
*peer_mdev
;
779 struct mlx5e_hairpin_entry
*hpe
;
780 struct mlx5e_hairpin
*hp
;
787 peer_mdev
= mlx5e_hairpin_get_mdev(dev_net(priv
->netdev
), peer_ifindex
);
788 if (!MLX5_CAP_GEN(priv
->mdev
, hairpin
) || !MLX5_CAP_GEN(peer_mdev
, hairpin
)) {
789 NL_SET_ERR_MSG_MOD(extack
, "hairpin is not supported");
793 peer_id
= MLX5_CAP_GEN(peer_mdev
, vhca_id
);
794 err
= mlx5e_hairpin_get_prio(priv
, &parse_attr
->spec
, &match_prio
,
799 mutex_lock(&priv
->fs
.tc
.hairpin_tbl_lock
);
800 hpe
= mlx5e_hairpin_get(priv
, peer_id
, match_prio
);
802 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
803 wait_for_completion(&hpe
->res_ready
);
805 if (IS_ERR(hpe
->hp
)) {
812 hpe
= kzalloc(sizeof(*hpe
), GFP_KERNEL
);
814 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
818 spin_lock_init(&hpe
->flows_lock
);
819 INIT_LIST_HEAD(&hpe
->flows
);
820 INIT_LIST_HEAD(&hpe
->dead_peer_wait_list
);
821 hpe
->peer_vhca_id
= peer_id
;
822 hpe
->prio
= match_prio
;
823 refcount_set(&hpe
->refcnt
, 1);
824 init_completion(&hpe
->res_ready
);
826 hash_add(priv
->fs
.tc
.hairpin_tbl
, &hpe
->hairpin_hlist
,
827 hash_hairpin_info(peer_id
, match_prio
));
828 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
830 params
.log_data_size
= 15;
831 params
.log_data_size
= min_t(u8
, params
.log_data_size
,
832 MLX5_CAP_GEN(priv
->mdev
, log_max_hairpin_wq_data_sz
));
833 params
.log_data_size
= max_t(u8
, params
.log_data_size
,
834 MLX5_CAP_GEN(priv
->mdev
, log_min_hairpin_wq_data_sz
));
836 params
.log_num_packets
= params
.log_data_size
-
837 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv
->mdev
);
838 params
.log_num_packets
= min_t(u8
, params
.log_num_packets
,
839 MLX5_CAP_GEN(priv
->mdev
, log_max_hairpin_num_packets
));
841 params
.q_counter
= priv
->q_counter
;
842 /* set hairpin pair per each 50Gbs share of the link */
843 mlx5e_port_max_linkspeed(priv
->mdev
, &link_speed
);
844 link_speed
= max_t(u32
, link_speed
, 50000);
845 link_speed64
= link_speed
;
846 do_div(link_speed64
, 50000);
847 params
.num_channels
= link_speed64
;
849 hp
= mlx5e_hairpin_create(priv
, ¶ms
, peer_ifindex
);
851 complete_all(&hpe
->res_ready
);
857 netdev_dbg(priv
->netdev
, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
858 hp
->tirn
, hp
->pair
->rqn
[0],
859 dev_name(hp
->pair
->peer_mdev
->device
),
860 hp
->pair
->sqn
[0], match_prio
, params
.log_data_size
, params
.log_num_packets
);
863 if (hpe
->hp
->num_channels
> 1) {
864 flow_flag_set(flow
, HAIRPIN_RSS
);
865 flow
->nic_attr
->hairpin_ft
= hpe
->hp
->ttc
.ft
.t
;
867 flow
->nic_attr
->hairpin_tirn
= hpe
->hp
->tirn
;
871 spin_lock(&hpe
->flows_lock
);
872 list_add(&flow
->hairpin
, &hpe
->flows
);
873 spin_unlock(&hpe
->flows_lock
);
878 mlx5e_hairpin_put(priv
, hpe
);
882 static void mlx5e_hairpin_flow_del(struct mlx5e_priv
*priv
,
883 struct mlx5e_tc_flow
*flow
)
885 /* flow wasn't fully initialized */
889 spin_lock(&flow
->hpe
->flows_lock
);
890 list_del(&flow
->hairpin
);
891 spin_unlock(&flow
->hpe
->flows_lock
);
893 mlx5e_hairpin_put(priv
, flow
->hpe
);
898 mlx5e_tc_add_nic_flow(struct mlx5e_priv
*priv
,
899 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
900 struct mlx5e_tc_flow
*flow
,
901 struct netlink_ext_ack
*extack
)
903 struct mlx5_flow_context
*flow_context
= &parse_attr
->spec
.flow_context
;
904 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
905 struct mlx5_core_dev
*dev
= priv
->mdev
;
906 struct mlx5_flow_destination dest
[2] = {};
907 struct mlx5_flow_act flow_act
= {
908 .action
= attr
->action
,
909 .flags
= FLOW_ACT_NO_APPEND
,
911 struct mlx5_fc
*counter
= NULL
;
912 int err
, dest_ix
= 0;
914 flow_context
->flags
|= FLOW_CONTEXT_HAS_TAG
;
915 flow_context
->flow_tag
= attr
->flow_tag
;
917 if (flow_flag_test(flow
, HAIRPIN
)) {
918 err
= mlx5e_hairpin_flow_add(priv
, flow
, parse_attr
, extack
);
922 if (flow_flag_test(flow
, HAIRPIN_RSS
)) {
923 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
924 dest
[dest_ix
].ft
= attr
->hairpin_ft
;
926 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_TIR
;
927 dest
[dest_ix
].tir_num
= attr
->hairpin_tirn
;
930 } else if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
931 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
932 dest
[dest_ix
].ft
= priv
->fs
.vlan
.ft
.t
;
936 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
937 counter
= mlx5_fc_create(dev
, true);
939 return PTR_ERR(counter
);
941 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_COUNTER
;
942 dest
[dest_ix
].counter_id
= mlx5_fc_id(counter
);
944 attr
->counter
= counter
;
947 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
948 err
= mlx5e_attach_mod_hdr(priv
, flow
, parse_attr
);
949 flow_act
.modify_hdr
= attr
->modify_hdr
;
950 kfree(parse_attr
->mod_hdr_actions
);
955 mutex_lock(&priv
->fs
.tc
.t_lock
);
956 if (IS_ERR_OR_NULL(priv
->fs
.tc
.t
)) {
957 int tc_grp_size
, tc_tbl_size
;
958 u32 max_flow_counter
;
960 max_flow_counter
= (MLX5_CAP_GEN(dev
, max_flow_counter_31_16
) << 16) |
961 MLX5_CAP_GEN(dev
, max_flow_counter_15_0
);
963 tc_grp_size
= min_t(int, max_flow_counter
, MLX5E_TC_TABLE_MAX_GROUP_SIZE
);
965 tc_tbl_size
= min_t(int, tc_grp_size
* MLX5E_TC_TABLE_NUM_GROUPS
,
966 BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev
, log_max_ft_size
)));
969 mlx5_create_auto_grouped_flow_table(priv
->fs
.ns
,
972 MLX5E_TC_TABLE_NUM_GROUPS
,
973 MLX5E_TC_FT_LEVEL
, 0);
974 if (IS_ERR(priv
->fs
.tc
.t
)) {
975 mutex_unlock(&priv
->fs
.tc
.t_lock
);
976 NL_SET_ERR_MSG_MOD(extack
,
977 "Failed to create tc offload table\n");
978 netdev_err(priv
->netdev
,
979 "Failed to create tc offload table\n");
980 return PTR_ERR(priv
->fs
.tc
.t
);
984 if (attr
->match_level
!= MLX5_MATCH_NONE
)
985 parse_attr
->spec
.match_criteria_enable
|= MLX5_MATCH_OUTER_HEADERS
;
987 flow
->rule
[0] = mlx5_add_flow_rules(priv
->fs
.tc
.t
, &parse_attr
->spec
,
988 &flow_act
, dest
, dest_ix
);
989 mutex_unlock(&priv
->fs
.tc
.t_lock
);
991 return PTR_ERR_OR_ZERO(flow
->rule
[0]);
994 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv
*priv
,
995 struct mlx5e_tc_flow
*flow
)
997 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
998 struct mlx5_fc
*counter
= NULL
;
1000 counter
= attr
->counter
;
1001 if (!IS_ERR_OR_NULL(flow
->rule
[0]))
1002 mlx5_del_flow_rules(flow
->rule
[0]);
1003 mlx5_fc_destroy(priv
->mdev
, counter
);
1005 mutex_lock(&priv
->fs
.tc
.t_lock
);
1006 if (!mlx5e_tc_num_filters(priv
, MLX5_TC_FLAG(NIC_OFFLOAD
)) && priv
->fs
.tc
.t
) {
1007 mlx5_destroy_flow_table(priv
->fs
.tc
.t
);
1008 priv
->fs
.tc
.t
= NULL
;
1010 mutex_unlock(&priv
->fs
.tc
.t_lock
);
1012 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
1013 mlx5e_detach_mod_hdr(priv
, flow
);
1015 if (flow_flag_test(flow
, HAIRPIN
))
1016 mlx5e_hairpin_flow_del(priv
, flow
);
1019 static void mlx5e_detach_encap(struct mlx5e_priv
*priv
,
1020 struct mlx5e_tc_flow
*flow
, int out_index
);
1022 static int mlx5e_attach_encap(struct mlx5e_priv
*priv
,
1023 struct mlx5e_tc_flow
*flow
,
1024 struct net_device
*mirred_dev
,
1026 struct netlink_ext_ack
*extack
,
1027 struct net_device
**encap_dev
,
1030 static struct mlx5_flow_handle
*
1031 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch
*esw
,
1032 struct mlx5e_tc_flow
*flow
,
1033 struct mlx5_flow_spec
*spec
,
1034 struct mlx5_esw_flow_attr
*attr
)
1036 struct mlx5_flow_handle
*rule
;
1038 rule
= mlx5_eswitch_add_offloaded_rule(esw
, spec
, attr
);
1042 if (attr
->split_count
) {
1043 flow
->rule
[1] = mlx5_eswitch_add_fwd_rule(esw
, spec
, attr
);
1044 if (IS_ERR(flow
->rule
[1])) {
1045 mlx5_eswitch_del_offloaded_rule(esw
, rule
, attr
);
1046 return flow
->rule
[1];
1054 mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch
*esw
,
1055 struct mlx5e_tc_flow
*flow
,
1056 struct mlx5_esw_flow_attr
*attr
)
1058 flow_flag_clear(flow
, OFFLOADED
);
1060 if (attr
->split_count
)
1061 mlx5_eswitch_del_fwd_rule(esw
, flow
->rule
[1], attr
);
1063 mlx5_eswitch_del_offloaded_rule(esw
, flow
->rule
[0], attr
);
1066 static struct mlx5_flow_handle
*
1067 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch
*esw
,
1068 struct mlx5e_tc_flow
*flow
,
1069 struct mlx5_flow_spec
*spec
,
1070 struct mlx5_esw_flow_attr
*slow_attr
)
1072 struct mlx5_flow_handle
*rule
;
1074 memcpy(slow_attr
, flow
->esw_attr
, sizeof(*slow_attr
));
1075 slow_attr
->action
= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1076 slow_attr
->split_count
= 0;
1077 slow_attr
->dest_chain
= FDB_SLOW_PATH_CHAIN
;
1079 rule
= mlx5e_tc_offload_fdb_rules(esw
, flow
, spec
, slow_attr
);
1081 flow_flag_set(flow
, SLOW
);
1087 mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch
*esw
,
1088 struct mlx5e_tc_flow
*flow
,
1089 struct mlx5_esw_flow_attr
*slow_attr
)
1091 memcpy(slow_attr
, flow
->esw_attr
, sizeof(*slow_attr
));
1092 slow_attr
->action
= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1093 slow_attr
->split_count
= 0;
1094 slow_attr
->dest_chain
= FDB_SLOW_PATH_CHAIN
;
1095 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, slow_attr
);
1096 flow_flag_clear(flow
, SLOW
);
1099 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1102 static void unready_flow_add(struct mlx5e_tc_flow
*flow
,
1103 struct list_head
*unready_flows
)
1105 flow_flag_set(flow
, NOT_READY
);
1106 list_add_tail(&flow
->unready
, unready_flows
);
1109 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1112 static void unready_flow_del(struct mlx5e_tc_flow
*flow
)
1114 list_del(&flow
->unready
);
1115 flow_flag_clear(flow
, NOT_READY
);
1118 static void add_unready_flow(struct mlx5e_tc_flow
*flow
)
1120 struct mlx5_rep_uplink_priv
*uplink_priv
;
1121 struct mlx5e_rep_priv
*rpriv
;
1122 struct mlx5_eswitch
*esw
;
1124 esw
= flow
->priv
->mdev
->priv
.eswitch
;
1125 rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
1126 uplink_priv
= &rpriv
->uplink_priv
;
1128 mutex_lock(&uplink_priv
->unready_flows_lock
);
1129 unready_flow_add(flow
, &uplink_priv
->unready_flows
);
1130 mutex_unlock(&uplink_priv
->unready_flows_lock
);
1133 static void remove_unready_flow(struct mlx5e_tc_flow
*flow
)
1135 struct mlx5_rep_uplink_priv
*uplink_priv
;
1136 struct mlx5e_rep_priv
*rpriv
;
1137 struct mlx5_eswitch
*esw
;
1139 esw
= flow
->priv
->mdev
->priv
.eswitch
;
1140 rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
1141 uplink_priv
= &rpriv
->uplink_priv
;
1143 mutex_lock(&uplink_priv
->unready_flows_lock
);
1144 unready_flow_del(flow
);
1145 mutex_unlock(&uplink_priv
->unready_flows_lock
);
1149 mlx5e_tc_add_fdb_flow(struct mlx5e_priv
*priv
,
1150 struct mlx5e_tc_flow
*flow
,
1151 struct netlink_ext_ack
*extack
)
1153 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1154 u32 max_chain
= mlx5_eswitch_get_chain_range(esw
);
1155 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
1156 struct mlx5e_tc_flow_parse_attr
*parse_attr
= attr
->parse_attr
;
1157 u16 max_prio
= mlx5_eswitch_get_prio_range(esw
);
1158 struct net_device
*out_dev
, *encap_dev
= NULL
;
1159 struct mlx5_fc
*counter
= NULL
;
1160 struct mlx5e_rep_priv
*rpriv
;
1161 struct mlx5e_priv
*out_priv
;
1162 bool encap_valid
= true;
1166 if (!mlx5_eswitch_prios_supported(esw
) && attr
->prio
!= 1) {
1167 NL_SET_ERR_MSG(extack
, "E-switch priorities unsupported, upgrade FW");
1171 if (attr
->chain
> max_chain
) {
1172 NL_SET_ERR_MSG(extack
, "Requested chain is out of supported range");
1176 if (attr
->prio
> max_prio
) {
1177 NL_SET_ERR_MSG(extack
, "Requested priority is out of supported range");
1181 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++) {
1184 if (!(attr
->dests
[out_index
].flags
& MLX5_ESW_DEST_ENCAP
))
1187 mirred_ifindex
= parse_attr
->mirred_ifindex
[out_index
];
1188 out_dev
= __dev_get_by_index(dev_net(priv
->netdev
),
1190 err
= mlx5e_attach_encap(priv
, flow
, out_dev
, out_index
,
1191 extack
, &encap_dev
, &encap_valid
);
1195 out_priv
= netdev_priv(encap_dev
);
1196 rpriv
= out_priv
->ppriv
;
1197 attr
->dests
[out_index
].rep
= rpriv
->rep
;
1198 attr
->dests
[out_index
].mdev
= out_priv
->mdev
;
1201 err
= mlx5_eswitch_add_vlan_action(esw
, attr
);
1205 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
1206 err
= mlx5e_attach_mod_hdr(priv
, flow
, parse_attr
);
1207 kfree(parse_attr
->mod_hdr_actions
);
1212 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
1213 counter
= mlx5_fc_create(attr
->counter_dev
, true);
1214 if (IS_ERR(counter
))
1215 return PTR_ERR(counter
);
1217 attr
->counter
= counter
;
1220 /* we get here if one of the following takes place:
1221 * (1) there's no error
1222 * (2) there's an encap action and we don't have valid neigh
1225 /* continue with goto slow path rule instead */
1226 struct mlx5_esw_flow_attr slow_attr
;
1228 flow
->rule
[0] = mlx5e_tc_offload_to_slow_path(esw
, flow
, &parse_attr
->spec
, &slow_attr
);
1230 flow
->rule
[0] = mlx5e_tc_offload_fdb_rules(esw
, flow
, &parse_attr
->spec
, attr
);
1233 if (IS_ERR(flow
->rule
[0]))
1234 return PTR_ERR(flow
->rule
[0]);
1236 flow_flag_set(flow
, OFFLOADED
);
1241 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow
*flow
)
1243 struct mlx5_flow_spec
*spec
= &flow
->esw_attr
->parse_attr
->spec
;
1244 void *headers_v
= MLX5_ADDR_OF(fte_match_param
,
1247 u32 geneve_tlv_opt_0_data
= MLX5_GET(fte_match_set_misc3
,
1249 geneve_tlv_option_0_data
);
1251 return !!geneve_tlv_opt_0_data
;
1254 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv
*priv
,
1255 struct mlx5e_tc_flow
*flow
)
1257 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1258 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
1259 struct mlx5_esw_flow_attr slow_attr
;
1262 if (flow_flag_test(flow
, NOT_READY
)) {
1263 remove_unready_flow(flow
);
1264 kvfree(attr
->parse_attr
);
1268 if (mlx5e_is_offloaded_flow(flow
)) {
1269 if (flow_flag_test(flow
, SLOW
))
1270 mlx5e_tc_unoffload_from_slow_path(esw
, flow
, &slow_attr
);
1272 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, attr
);
1275 if (mlx5_flow_has_geneve_opt(flow
))
1276 mlx5_geneve_tlv_option_del(priv
->mdev
->geneve
);
1278 mlx5_eswitch_del_vlan_action(esw
, attr
);
1280 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++)
1281 if (attr
->dests
[out_index
].flags
& MLX5_ESW_DEST_ENCAP
) {
1282 mlx5e_detach_encap(priv
, flow
, out_index
);
1283 kfree(attr
->parse_attr
->tun_info
[out_index
]);
1285 kvfree(attr
->parse_attr
);
1287 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
1288 mlx5e_detach_mod_hdr(priv
, flow
);
1290 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
)
1291 mlx5_fc_destroy(attr
->counter_dev
, attr
->counter
);
1294 void mlx5e_tc_encap_flows_add(struct mlx5e_priv
*priv
,
1295 struct mlx5e_encap_entry
*e
,
1296 struct list_head
*flow_list
)
1298 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1299 struct mlx5_esw_flow_attr slow_attr
, *esw_attr
;
1300 struct mlx5_flow_handle
*rule
;
1301 struct mlx5_flow_spec
*spec
;
1302 struct mlx5e_tc_flow
*flow
;
1305 e
->pkt_reformat
= mlx5_packet_reformat_alloc(priv
->mdev
,
1307 e
->encap_size
, e
->encap_header
,
1308 MLX5_FLOW_NAMESPACE_FDB
);
1309 if (IS_ERR(e
->pkt_reformat
)) {
1310 mlx5_core_warn(priv
->mdev
, "Failed to offload cached encapsulation header, %lu\n",
1311 PTR_ERR(e
->pkt_reformat
));
1314 e
->flags
|= MLX5_ENCAP_ENTRY_VALID
;
1315 mlx5e_rep_queue_neigh_stats_work(priv
);
1317 list_for_each_entry(flow
, flow_list
, tmp_list
) {
1318 bool all_flow_encaps_valid
= true;
1321 if (!mlx5e_is_offloaded_flow(flow
))
1323 esw_attr
= flow
->esw_attr
;
1324 spec
= &esw_attr
->parse_attr
->spec
;
1326 esw_attr
->dests
[flow
->tmp_efi_index
].pkt_reformat
= e
->pkt_reformat
;
1327 esw_attr
->dests
[flow
->tmp_efi_index
].flags
|= MLX5_ESW_DEST_ENCAP_VALID
;
1328 /* Flow can be associated with multiple encap entries.
1329 * Before offloading the flow verify that all of them have
1330 * a valid neighbour.
1332 for (i
= 0; i
< MLX5_MAX_FLOW_FWD_VPORTS
; i
++) {
1333 if (!(esw_attr
->dests
[i
].flags
& MLX5_ESW_DEST_ENCAP
))
1335 if (!(esw_attr
->dests
[i
].flags
& MLX5_ESW_DEST_ENCAP_VALID
)) {
1336 all_flow_encaps_valid
= false;
1340 /* Do not offload flows with unresolved neighbors */
1341 if (!all_flow_encaps_valid
)
1343 /* update from slow path rule to encap rule */
1344 rule
= mlx5e_tc_offload_fdb_rules(esw
, flow
, spec
, esw_attr
);
1346 err
= PTR_ERR(rule
);
1347 mlx5_core_warn(priv
->mdev
, "Failed to update cached encapsulation flow, %d\n",
1352 mlx5e_tc_unoffload_from_slow_path(esw
, flow
, &slow_attr
);
1353 flow
->rule
[0] = rule
;
1354 /* was unset when slow path rule removed */
1355 flow_flag_set(flow
, OFFLOADED
);
1359 void mlx5e_tc_encap_flows_del(struct mlx5e_priv
*priv
,
1360 struct mlx5e_encap_entry
*e
,
1361 struct list_head
*flow_list
)
1363 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1364 struct mlx5_esw_flow_attr slow_attr
;
1365 struct mlx5_flow_handle
*rule
;
1366 struct mlx5_flow_spec
*spec
;
1367 struct mlx5e_tc_flow
*flow
;
1370 list_for_each_entry(flow
, flow_list
, tmp_list
) {
1371 if (!mlx5e_is_offloaded_flow(flow
))
1373 spec
= &flow
->esw_attr
->parse_attr
->spec
;
1375 /* update from encap rule to slow path rule */
1376 rule
= mlx5e_tc_offload_to_slow_path(esw
, flow
, spec
, &slow_attr
);
1377 /* mark the flow's encap dest as non-valid */
1378 flow
->esw_attr
->dests
[flow
->tmp_efi_index
].flags
&= ~MLX5_ESW_DEST_ENCAP_VALID
;
1381 err
= PTR_ERR(rule
);
1382 mlx5_core_warn(priv
->mdev
, "Failed to update slow path (encap) flow, %d\n",
1387 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, flow
->esw_attr
);
1388 flow
->rule
[0] = rule
;
1389 /* was unset when fast path rule removed */
1390 flow_flag_set(flow
, OFFLOADED
);
1393 /* we know that the encap is valid */
1394 e
->flags
&= ~MLX5_ENCAP_ENTRY_VALID
;
1395 mlx5_packet_reformat_dealloc(priv
->mdev
, e
->pkt_reformat
);
1398 static struct mlx5_fc
*mlx5e_tc_get_counter(struct mlx5e_tc_flow
*flow
)
1400 if (mlx5e_is_eswitch_flow(flow
))
1401 return flow
->esw_attr
->counter
;
1403 return flow
->nic_attr
->counter
;
1406 /* Takes reference to all flows attached to encap and adds the flows to
1407 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
1409 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry
*e
, struct list_head
*flow_list
)
1411 struct encap_flow_item
*efi
;
1412 struct mlx5e_tc_flow
*flow
;
1414 list_for_each_entry(efi
, &e
->flows
, list
) {
1415 flow
= container_of(efi
, struct mlx5e_tc_flow
, encaps
[efi
->index
]);
1416 if (IS_ERR(mlx5e_flow_get(flow
)))
1418 wait_for_completion(&flow
->init_done
);
1420 flow
->tmp_efi_index
= efi
->index
;
1421 list_add(&flow
->tmp_list
, flow_list
);
1425 /* Iterate over tmp_list of flows attached to flow_list head. */
1426 void mlx5e_put_encap_flow_list(struct mlx5e_priv
*priv
, struct list_head
*flow_list
)
1428 struct mlx5e_tc_flow
*flow
, *tmp
;
1430 list_for_each_entry_safe(flow
, tmp
, flow_list
, tmp_list
)
1431 mlx5e_flow_put(priv
, flow
);
1434 static struct mlx5e_encap_entry
*
1435 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry
*nhe
,
1436 struct mlx5e_encap_entry
*e
)
1438 struct mlx5e_encap_entry
*next
= NULL
;
1443 /* find encap with non-zero reference counter value */
1445 list_next_or_null_rcu(&nhe
->encap_list
,
1447 struct mlx5e_encap_entry
,
1449 list_first_or_null_rcu(&nhe
->encap_list
,
1450 struct mlx5e_encap_entry
,
1453 next
= list_next_or_null_rcu(&nhe
->encap_list
,
1455 struct mlx5e_encap_entry
,
1457 if (mlx5e_encap_take(next
))
1462 /* release starting encap */
1464 mlx5e_encap_put(netdev_priv(e
->out_dev
), e
);
1468 /* wait for encap to be fully initialized */
1469 wait_for_completion(&next
->res_ready
);
1470 /* continue searching if encap entry is not in valid state after completion */
1471 if (!(next
->flags
& MLX5_ENCAP_ENTRY_VALID
)) {
1479 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry
*nhe
)
1481 struct mlx5e_neigh
*m_neigh
= &nhe
->m_neigh
;
1482 struct mlx5e_encap_entry
*e
= NULL
;
1483 struct mlx5e_tc_flow
*flow
;
1484 struct mlx5_fc
*counter
;
1485 struct neigh_table
*tbl
;
1486 bool neigh_used
= false;
1487 struct neighbour
*n
;
1490 if (m_neigh
->family
== AF_INET
)
1492 #if IS_ENABLED(CONFIG_IPV6)
1493 else if (m_neigh
->family
== AF_INET6
)
1494 tbl
= ipv6_stub
->nd_tbl
;
1499 /* mlx5e_get_next_valid_encap() releases previous encap before returning
1502 while ((e
= mlx5e_get_next_valid_encap(nhe
, e
)) != NULL
) {
1503 struct mlx5e_priv
*priv
= netdev_priv(e
->out_dev
);
1504 struct encap_flow_item
*efi
, *tmp
;
1505 struct mlx5_eswitch
*esw
;
1506 LIST_HEAD(flow_list
);
1508 esw
= priv
->mdev
->priv
.eswitch
;
1509 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
1510 list_for_each_entry_safe(efi
, tmp
, &e
->flows
, list
) {
1511 flow
= container_of(efi
, struct mlx5e_tc_flow
,
1512 encaps
[efi
->index
]);
1513 if (IS_ERR(mlx5e_flow_get(flow
)))
1515 list_add(&flow
->tmp_list
, &flow_list
);
1517 if (mlx5e_is_offloaded_flow(flow
)) {
1518 counter
= mlx5e_tc_get_counter(flow
);
1519 lastuse
= mlx5_fc_query_lastuse(counter
);
1520 if (time_after((unsigned long)lastuse
, nhe
->reported_lastuse
)) {
1526 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
1528 mlx5e_put_encap_flow_list(priv
, &flow_list
);
1530 /* release current encap before breaking the loop */
1531 mlx5e_encap_put(priv
, e
);
1536 trace_mlx5e_tc_update_neigh_used_value(nhe
, neigh_used
);
1539 nhe
->reported_lastuse
= jiffies
;
1541 /* find the relevant neigh according to the cached device and
1544 n
= neigh_lookup(tbl
, &m_neigh
->dst_ip
, m_neigh
->dev
);
1548 neigh_event_send(n
, NULL
);
1553 static void mlx5e_encap_dealloc(struct mlx5e_priv
*priv
, struct mlx5e_encap_entry
*e
)
1555 WARN_ON(!list_empty(&e
->flows
));
1557 if (e
->compl_result
> 0) {
1558 mlx5e_rep_encap_entry_detach(netdev_priv(e
->out_dev
), e
);
1560 if (e
->flags
& MLX5_ENCAP_ENTRY_VALID
)
1561 mlx5_packet_reformat_dealloc(priv
->mdev
, e
->pkt_reformat
);
1565 kfree(e
->encap_header
);
1569 void mlx5e_encap_put(struct mlx5e_priv
*priv
, struct mlx5e_encap_entry
*e
)
1571 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1573 if (!refcount_dec_and_mutex_lock(&e
->refcnt
, &esw
->offloads
.encap_tbl_lock
))
1575 hash_del_rcu(&e
->encap_hlist
);
1576 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
1578 mlx5e_encap_dealloc(priv
, e
);
1581 static void mlx5e_detach_encap(struct mlx5e_priv
*priv
,
1582 struct mlx5e_tc_flow
*flow
, int out_index
)
1584 struct mlx5e_encap_entry
*e
= flow
->encaps
[out_index
].e
;
1585 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1587 /* flow wasn't fully initialized */
1591 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
1592 list_del(&flow
->encaps
[out_index
].list
);
1593 flow
->encaps
[out_index
].e
= NULL
;
1594 if (!refcount_dec_and_test(&e
->refcnt
)) {
1595 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
1598 hash_del_rcu(&e
->encap_hlist
);
1599 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
1601 mlx5e_encap_dealloc(priv
, e
);
1604 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow
*flow
)
1606 struct mlx5_eswitch
*esw
= flow
->priv
->mdev
->priv
.eswitch
;
1608 if (!flow_flag_test(flow
, ESWITCH
) ||
1609 !flow_flag_test(flow
, DUP
))
1612 mutex_lock(&esw
->offloads
.peer_mutex
);
1613 list_del(&flow
->peer
);
1614 mutex_unlock(&esw
->offloads
.peer_mutex
);
1616 flow_flag_clear(flow
, DUP
);
1618 mlx5e_tc_del_fdb_flow(flow
->peer_flow
->priv
, flow
->peer_flow
);
1619 kvfree(flow
->peer_flow
);
1620 flow
->peer_flow
= NULL
;
1623 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow
*flow
)
1625 struct mlx5_core_dev
*dev
= flow
->priv
->mdev
;
1626 struct mlx5_devcom
*devcom
= dev
->priv
.devcom
;
1627 struct mlx5_eswitch
*peer_esw
;
1629 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
1633 __mlx5e_tc_del_fdb_peer_flow(flow
);
1634 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
1637 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
1638 struct mlx5e_tc_flow
*flow
)
1640 if (mlx5e_is_eswitch_flow(flow
)) {
1641 mlx5e_tc_del_fdb_peer_flow(flow
);
1642 mlx5e_tc_del_fdb_flow(priv
, flow
);
1644 mlx5e_tc_del_nic_flow(priv
, flow
);
1649 static int parse_tunnel_attr(struct mlx5e_priv
*priv
,
1650 struct mlx5_flow_spec
*spec
,
1651 struct flow_cls_offload
*f
,
1652 struct net_device
*filter_dev
, u8
*match_level
)
1654 struct netlink_ext_ack
*extack
= f
->common
.extack
;
1655 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1657 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1659 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
1662 err
= mlx5e_tc_tun_parse(filter_dev
, priv
, spec
, f
,
1663 headers_c
, headers_v
, match_level
);
1665 NL_SET_ERR_MSG_MOD(extack
,
1666 "failed to parse tunnel attributes");
1670 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ENC_CONTROL
)) {
1671 struct flow_match_control match
;
1674 flow_rule_match_enc_control(rule
, &match
);
1675 addr_type
= match
.key
->addr_type
;
1677 /* For tunnel addr_type used same key id`s as for non-tunnel */
1678 if (addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
1679 struct flow_match_ipv4_addrs match
;
1681 flow_rule_match_enc_ipv4_addrs(rule
, &match
);
1682 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
1683 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
1684 ntohl(match
.mask
->src
));
1685 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
1686 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
1687 ntohl(match
.key
->src
));
1689 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
1690 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
1691 ntohl(match
.mask
->dst
));
1692 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
1693 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
1694 ntohl(match
.key
->dst
));
1696 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
,
1698 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
1700 } else if (addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
1701 struct flow_match_ipv6_addrs match
;
1703 flow_rule_match_enc_ipv6_addrs(rule
, &match
);
1704 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
1705 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
1706 &match
.mask
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
,
1708 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1709 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
1710 &match
.key
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
,
1713 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
1714 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
1715 &match
.mask
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
,
1717 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1718 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
1719 &match
.key
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
,
1722 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
,
1724 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
1729 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ENC_IP
)) {
1730 struct flow_match_ip match
;
1732 flow_rule_match_enc_ip(rule
, &match
);
1733 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_ecn
,
1734 match
.mask
->tos
& 0x3);
1735 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_ecn
,
1736 match
.key
->tos
& 0x3);
1738 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_dscp
,
1739 match
.mask
->tos
>> 2);
1740 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_dscp
,
1741 match
.key
->tos
>> 2);
1743 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ttl_hoplimit
,
1745 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ttl_hoplimit
,
1748 if (match
.mask
->ttl
&&
1749 !MLX5_CAP_ESW_FLOWTABLE_FDB
1751 ft_field_support
.outer_ipv4_ttl
)) {
1752 NL_SET_ERR_MSG_MOD(extack
,
1753 "Matching on TTL is not supported");
1759 /* Enforce DMAC when offloading incoming tunneled flows.
1760 * Flow counters require a match on the DMAC.
1762 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_47_16
);
1763 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_15_0
);
1764 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1765 dmac_47_16
), priv
->netdev
->dev_addr
);
1767 /* let software handle IP fragments */
1768 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
1769 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
, 0);
1774 static void *get_match_headers_criteria(u32 flags
,
1775 struct mlx5_flow_spec
*spec
)
1777 return (flags
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ?
1778 MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1780 MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1784 static void *get_match_headers_value(u32 flags
,
1785 struct mlx5_flow_spec
*spec
)
1787 return (flags
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ?
1788 MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1790 MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1794 static int __parse_cls_flower(struct mlx5e_priv
*priv
,
1795 struct mlx5_flow_spec
*spec
,
1796 struct flow_cls_offload
*f
,
1797 struct net_device
*filter_dev
,
1798 u8
*inner_match_level
, u8
*outer_match_level
)
1800 struct netlink_ext_ack
*extack
= f
->common
.extack
;
1801 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1803 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1805 void *misc_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1807 void *misc_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1809 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
1810 struct flow_dissector
*dissector
= rule
->match
.dissector
;
1815 match_level
= outer_match_level
;
1817 if (dissector
->used_keys
&
1818 ~(BIT(FLOW_DISSECTOR_KEY_META
) |
1819 BIT(FLOW_DISSECTOR_KEY_CONTROL
) |
1820 BIT(FLOW_DISSECTOR_KEY_BASIC
) |
1821 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS
) |
1822 BIT(FLOW_DISSECTOR_KEY_VLAN
) |
1823 BIT(FLOW_DISSECTOR_KEY_CVLAN
) |
1824 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS
) |
1825 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS
) |
1826 BIT(FLOW_DISSECTOR_KEY_PORTS
) |
1827 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID
) |
1828 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) |
1829 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
) |
1830 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS
) |
1831 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL
) |
1832 BIT(FLOW_DISSECTOR_KEY_TCP
) |
1833 BIT(FLOW_DISSECTOR_KEY_IP
) |
1834 BIT(FLOW_DISSECTOR_KEY_ENC_IP
) |
1835 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS
))) {
1836 NL_SET_ERR_MSG_MOD(extack
, "Unsupported key");
1837 netdev_warn(priv
->netdev
, "Unsupported key used: 0x%x\n",
1838 dissector
->used_keys
);
1842 if (mlx5e_get_tc_tun(filter_dev
)) {
1843 if (parse_tunnel_attr(priv
, spec
, f
, filter_dev
,
1847 /* At this point, header pointers should point to the inner
1848 * headers, outer header were already set by parse_tunnel_attr
1850 match_level
= inner_match_level
;
1851 headers_c
= get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP
,
1853 headers_v
= get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP
,
1857 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_BASIC
)) {
1858 struct flow_match_basic match
;
1860 flow_rule_match_basic(rule
, &match
);
1861 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ethertype
,
1862 ntohs(match
.mask
->n_proto
));
1863 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
1864 ntohs(match
.key
->n_proto
));
1866 if (match
.mask
->n_proto
)
1867 *match_level
= MLX5_MATCH_L2
;
1869 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_VLAN
) ||
1870 is_vlan_dev(filter_dev
)) {
1871 struct flow_dissector_key_vlan filter_dev_mask
;
1872 struct flow_dissector_key_vlan filter_dev_key
;
1873 struct flow_match_vlan match
;
1875 if (is_vlan_dev(filter_dev
)) {
1876 match
.key
= &filter_dev_key
;
1877 match
.key
->vlan_id
= vlan_dev_vlan_id(filter_dev
);
1878 match
.key
->vlan_tpid
= vlan_dev_vlan_proto(filter_dev
);
1879 match
.key
->vlan_priority
= 0;
1880 match
.mask
= &filter_dev_mask
;
1881 memset(match
.mask
, 0xff, sizeof(*match
.mask
));
1882 match
.mask
->vlan_priority
= 0;
1884 flow_rule_match_vlan(rule
, &match
);
1886 if (match
.mask
->vlan_id
||
1887 match
.mask
->vlan_priority
||
1888 match
.mask
->vlan_tpid
) {
1889 if (match
.key
->vlan_tpid
== htons(ETH_P_8021AD
)) {
1890 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
1892 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
1895 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
1897 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
1901 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_vid
,
1902 match
.mask
->vlan_id
);
1903 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_vid
,
1904 match
.key
->vlan_id
);
1906 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_prio
,
1907 match
.mask
->vlan_priority
);
1908 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_prio
,
1909 match
.key
->vlan_priority
);
1911 *match_level
= MLX5_MATCH_L2
;
1913 } else if (*match_level
!= MLX5_MATCH_NONE
) {
1914 /* cvlan_tag enabled in match criteria and
1915 * disabled in match value means both S & C tags
1916 * don't exist (untagged of both)
1918 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
, 1);
1919 *match_level
= MLX5_MATCH_L2
;
1922 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_CVLAN
)) {
1923 struct flow_match_vlan match
;
1925 flow_rule_match_cvlan(rule
, &match
);
1926 if (match
.mask
->vlan_id
||
1927 match
.mask
->vlan_priority
||
1928 match
.mask
->vlan_tpid
) {
1929 if (match
.key
->vlan_tpid
== htons(ETH_P_8021AD
)) {
1930 MLX5_SET(fte_match_set_misc
, misc_c
,
1931 outer_second_svlan_tag
, 1);
1932 MLX5_SET(fte_match_set_misc
, misc_v
,
1933 outer_second_svlan_tag
, 1);
1935 MLX5_SET(fte_match_set_misc
, misc_c
,
1936 outer_second_cvlan_tag
, 1);
1937 MLX5_SET(fte_match_set_misc
, misc_v
,
1938 outer_second_cvlan_tag
, 1);
1941 MLX5_SET(fte_match_set_misc
, misc_c
, outer_second_vid
,
1942 match
.mask
->vlan_id
);
1943 MLX5_SET(fte_match_set_misc
, misc_v
, outer_second_vid
,
1944 match
.key
->vlan_id
);
1945 MLX5_SET(fte_match_set_misc
, misc_c
, outer_second_prio
,
1946 match
.mask
->vlan_priority
);
1947 MLX5_SET(fte_match_set_misc
, misc_v
, outer_second_prio
,
1948 match
.key
->vlan_priority
);
1950 *match_level
= MLX5_MATCH_L2
;
1954 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ETH_ADDRS
)) {
1955 struct flow_match_eth_addrs match
;
1957 flow_rule_match_eth_addrs(rule
, &match
);
1958 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
1961 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1965 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
1968 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1972 if (!is_zero_ether_addr(match
.mask
->src
) ||
1973 !is_zero_ether_addr(match
.mask
->dst
))
1974 *match_level
= MLX5_MATCH_L2
;
1977 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_CONTROL
)) {
1978 struct flow_match_control match
;
1980 flow_rule_match_control(rule
, &match
);
1981 addr_type
= match
.key
->addr_type
;
1983 /* the HW doesn't support frag first/later */
1984 if (match
.mask
->flags
& FLOW_DIS_FIRST_FRAG
)
1987 if (match
.mask
->flags
& FLOW_DIS_IS_FRAGMENT
) {
1988 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
1989 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
,
1990 match
.key
->flags
& FLOW_DIS_IS_FRAGMENT
);
1992 /* the HW doesn't need L3 inline to match on frag=no */
1993 if (!(match
.key
->flags
& FLOW_DIS_IS_FRAGMENT
))
1994 *match_level
= MLX5_MATCH_L2
;
1995 /* *** L2 attributes parsing up to here *** */
1997 *match_level
= MLX5_MATCH_L3
;
2001 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_BASIC
)) {
2002 struct flow_match_basic match
;
2004 flow_rule_match_basic(rule
, &match
);
2005 ip_proto
= match
.key
->ip_proto
;
2007 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
,
2008 match
.mask
->ip_proto
);
2009 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
,
2010 match
.key
->ip_proto
);
2012 if (match
.mask
->ip_proto
)
2013 *match_level
= MLX5_MATCH_L3
;
2016 if (addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
2017 struct flow_match_ipv4_addrs match
;
2019 flow_rule_match_ipv4_addrs(rule
, &match
);
2020 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2021 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2022 &match
.mask
->src
, sizeof(match
.mask
->src
));
2023 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2024 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2025 &match
.key
->src
, sizeof(match
.key
->src
));
2026 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2027 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2028 &match
.mask
->dst
, sizeof(match
.mask
->dst
));
2029 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2030 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2031 &match
.key
->dst
, sizeof(match
.key
->dst
));
2033 if (match
.mask
->src
|| match
.mask
->dst
)
2034 *match_level
= MLX5_MATCH_L3
;
2037 if (addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
2038 struct flow_match_ipv6_addrs match
;
2040 flow_rule_match_ipv6_addrs(rule
, &match
);
2041 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2042 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
2043 &match
.mask
->src
, sizeof(match
.mask
->src
));
2044 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2045 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
2046 &match
.key
->src
, sizeof(match
.key
->src
));
2048 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2049 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
2050 &match
.mask
->dst
, sizeof(match
.mask
->dst
));
2051 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2052 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
2053 &match
.key
->dst
, sizeof(match
.key
->dst
));
2055 if (ipv6_addr_type(&match
.mask
->src
) != IPV6_ADDR_ANY
||
2056 ipv6_addr_type(&match
.mask
->dst
) != IPV6_ADDR_ANY
)
2057 *match_level
= MLX5_MATCH_L3
;
2060 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_IP
)) {
2061 struct flow_match_ip match
;
2063 flow_rule_match_ip(rule
, &match
);
2064 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_ecn
,
2065 match
.mask
->tos
& 0x3);
2066 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_ecn
,
2067 match
.key
->tos
& 0x3);
2069 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_dscp
,
2070 match
.mask
->tos
>> 2);
2071 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_dscp
,
2072 match
.key
->tos
>> 2);
2074 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ttl_hoplimit
,
2076 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ttl_hoplimit
,
2079 if (match
.mask
->ttl
&&
2080 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv
->mdev
,
2081 ft_field_support
.outer_ipv4_ttl
)) {
2082 NL_SET_ERR_MSG_MOD(extack
,
2083 "Matching on TTL is not supported");
2087 if (match
.mask
->tos
|| match
.mask
->ttl
)
2088 *match_level
= MLX5_MATCH_L3
;
2091 /* *** L3 attributes parsing up to here *** */
2093 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_PORTS
)) {
2094 struct flow_match_ports match
;
2096 flow_rule_match_ports(rule
, &match
);
2099 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2100 tcp_sport
, ntohs(match
.mask
->src
));
2101 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2102 tcp_sport
, ntohs(match
.key
->src
));
2104 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2105 tcp_dport
, ntohs(match
.mask
->dst
));
2106 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2107 tcp_dport
, ntohs(match
.key
->dst
));
2111 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2112 udp_sport
, ntohs(match
.mask
->src
));
2113 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2114 udp_sport
, ntohs(match
.key
->src
));
2116 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2117 udp_dport
, ntohs(match
.mask
->dst
));
2118 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2119 udp_dport
, ntohs(match
.key
->dst
));
2122 NL_SET_ERR_MSG_MOD(extack
,
2123 "Only UDP and TCP transports are supported for L4 matching");
2124 netdev_err(priv
->netdev
,
2125 "Only UDP and TCP transport are supported\n");
2129 if (match
.mask
->src
|| match
.mask
->dst
)
2130 *match_level
= MLX5_MATCH_L4
;
2133 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_TCP
)) {
2134 struct flow_match_tcp match
;
2136 flow_rule_match_tcp(rule
, &match
);
2137 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, tcp_flags
,
2138 ntohs(match
.mask
->flags
));
2139 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, tcp_flags
,
2140 ntohs(match
.key
->flags
));
2142 if (match
.mask
->flags
)
2143 *match_level
= MLX5_MATCH_L4
;
2149 static int parse_cls_flower(struct mlx5e_priv
*priv
,
2150 struct mlx5e_tc_flow
*flow
,
2151 struct mlx5_flow_spec
*spec
,
2152 struct flow_cls_offload
*f
,
2153 struct net_device
*filter_dev
)
2155 u8 inner_match_level
, outer_match_level
, non_tunnel_match_level
;
2156 struct netlink_ext_ack
*extack
= f
->common
.extack
;
2157 struct mlx5_core_dev
*dev
= priv
->mdev
;
2158 struct mlx5_eswitch
*esw
= dev
->priv
.eswitch
;
2159 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
2160 struct mlx5_eswitch_rep
*rep
;
2161 bool is_eswitch_flow
;
2164 inner_match_level
= MLX5_MATCH_NONE
;
2165 outer_match_level
= MLX5_MATCH_NONE
;
2167 err
= __parse_cls_flower(priv
, spec
, f
, filter_dev
, &inner_match_level
,
2168 &outer_match_level
);
2169 non_tunnel_match_level
= (inner_match_level
== MLX5_MATCH_NONE
) ?
2170 outer_match_level
: inner_match_level
;
2172 is_eswitch_flow
= mlx5e_is_eswitch_flow(flow
);
2173 if (!err
&& is_eswitch_flow
) {
2175 if (rep
->vport
!= MLX5_VPORT_UPLINK
&&
2176 (esw
->offloads
.inline_mode
!= MLX5_INLINE_MODE_NONE
&&
2177 esw
->offloads
.inline_mode
< non_tunnel_match_level
)) {
2178 NL_SET_ERR_MSG_MOD(extack
,
2179 "Flow is not offloaded due to min inline setting");
2180 netdev_warn(priv
->netdev
,
2181 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2182 non_tunnel_match_level
, esw
->offloads
.inline_mode
);
2187 if (is_eswitch_flow
) {
2188 flow
->esw_attr
->inner_match_level
= inner_match_level
;
2189 flow
->esw_attr
->outer_match_level
= outer_match_level
;
2191 flow
->nic_attr
->match_level
= non_tunnel_match_level
;
2197 struct pedit_headers
{
2199 struct vlan_hdr vlan
;
2206 struct pedit_headers_action
{
2207 struct pedit_headers vals
;
2208 struct pedit_headers masks
;
2212 static int pedit_header_offsets
[] = {
2213 [FLOW_ACT_MANGLE_HDR_TYPE_ETH
] = offsetof(struct pedit_headers
, eth
),
2214 [FLOW_ACT_MANGLE_HDR_TYPE_IP4
] = offsetof(struct pedit_headers
, ip4
),
2215 [FLOW_ACT_MANGLE_HDR_TYPE_IP6
] = offsetof(struct pedit_headers
, ip6
),
2216 [FLOW_ACT_MANGLE_HDR_TYPE_TCP
] = offsetof(struct pedit_headers
, tcp
),
2217 [FLOW_ACT_MANGLE_HDR_TYPE_UDP
] = offsetof(struct pedit_headers
, udp
),
2220 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2222 static int set_pedit_val(u8 hdr_type
, u32 mask
, u32 val
, u32 offset
,
2223 struct pedit_headers_action
*hdrs
)
2225 u32
*curr_pmask
, *curr_pval
;
2227 curr_pmask
= (u32
*)(pedit_header(&hdrs
->masks
, hdr_type
) + offset
);
2228 curr_pval
= (u32
*)(pedit_header(&hdrs
->vals
, hdr_type
) + offset
);
2230 if (*curr_pmask
& mask
) /* disallow acting twice on the same location */
2233 *curr_pmask
|= mask
;
2234 *curr_pval
|= (val
& mask
);
2242 struct mlx5_fields
{
2249 #define OFFLOAD(fw_field, size, field, off, match_field) \
2250 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, \
2251 offsetof(struct pedit_headers, field) + (off), \
2252 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2254 /* masked values are the same and there are no rewrites that do not have a
2257 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2258 type matchmaskx = *(type *)(matchmaskp); \
2259 type matchvalx = *(type *)(matchvalp); \
2260 type maskx = *(type *)(maskp); \
2261 type valx = *(type *)(valp); \
2263 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2267 static bool cmp_val_mask(void *valp
, void *maskp
, void *matchvalp
,
2268 void *matchmaskp
, int size
)
2274 same
= SAME_VAL_MASK(u8
, valp
, maskp
, matchvalp
, matchmaskp
);
2277 same
= SAME_VAL_MASK(u16
, valp
, maskp
, matchvalp
, matchmaskp
);
2280 same
= SAME_VAL_MASK(u32
, valp
, maskp
, matchvalp
, matchmaskp
);
2287 static struct mlx5_fields fields
[] = {
2288 OFFLOAD(DMAC_47_16
, 4, eth
.h_dest
[0], 0, dmac_47_16
),
2289 OFFLOAD(DMAC_15_0
, 2, eth
.h_dest
[4], 0, dmac_15_0
),
2290 OFFLOAD(SMAC_47_16
, 4, eth
.h_source
[0], 0, smac_47_16
),
2291 OFFLOAD(SMAC_15_0
, 2, eth
.h_source
[4], 0, smac_15_0
),
2292 OFFLOAD(ETHERTYPE
, 2, eth
.h_proto
, 0, ethertype
),
2293 OFFLOAD(FIRST_VID
, 2, vlan
.h_vlan_TCI
, 0, first_vid
),
2295 OFFLOAD(IP_TTL
, 1, ip4
.ttl
, 0, ttl_hoplimit
),
2296 OFFLOAD(SIPV4
, 4, ip4
.saddr
, 0, src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2297 OFFLOAD(DIPV4
, 4, ip4
.daddr
, 0, dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2299 OFFLOAD(SIPV6_127_96
, 4, ip6
.saddr
.s6_addr32
[0], 0,
2300 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[0]),
2301 OFFLOAD(SIPV6_95_64
, 4, ip6
.saddr
.s6_addr32
[1], 0,
2302 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[4]),
2303 OFFLOAD(SIPV6_63_32
, 4, ip6
.saddr
.s6_addr32
[2], 0,
2304 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[8]),
2305 OFFLOAD(SIPV6_31_0
, 4, ip6
.saddr
.s6_addr32
[3], 0,
2306 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[12]),
2307 OFFLOAD(DIPV6_127_96
, 4, ip6
.daddr
.s6_addr32
[0], 0,
2308 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[0]),
2309 OFFLOAD(DIPV6_95_64
, 4, ip6
.daddr
.s6_addr32
[1], 0,
2310 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[4]),
2311 OFFLOAD(DIPV6_63_32
, 4, ip6
.daddr
.s6_addr32
[2], 0,
2312 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[8]),
2313 OFFLOAD(DIPV6_31_0
, 4, ip6
.daddr
.s6_addr32
[3], 0,
2314 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[12]),
2315 OFFLOAD(IPV6_HOPLIMIT
, 1, ip6
.hop_limit
, 0, ttl_hoplimit
),
2317 OFFLOAD(TCP_SPORT
, 2, tcp
.source
, 0, tcp_sport
),
2318 OFFLOAD(TCP_DPORT
, 2, tcp
.dest
, 0, tcp_dport
),
2319 OFFLOAD(TCP_FLAGS
, 1, tcp
.ack_seq
, 5, tcp_flags
),
2321 OFFLOAD(UDP_SPORT
, 2, udp
.source
, 0, udp_sport
),
2322 OFFLOAD(UDP_DPORT
, 2, udp
.dest
, 0, udp_dport
),
2325 /* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
2326 * max from the SW pedit action. On success, attr->num_mod_hdr_actions
2327 * says how many HW actions were actually parsed.
2329 static int offload_pedit_fields(struct pedit_headers_action
*hdrs
,
2330 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2332 struct netlink_ext_ack
*extack
)
2334 struct pedit_headers
*set_masks
, *add_masks
, *set_vals
, *add_vals
;
2335 void *headers_c
= get_match_headers_criteria(*action_flags
,
2337 void *headers_v
= get_match_headers_value(*action_flags
,
2339 int i
, action_size
, nactions
, max_actions
, first
, last
, next_z
;
2340 void *s_masks_p
, *a_masks_p
, *vals_p
;
2341 struct mlx5_fields
*f
;
2342 u8 cmd
, field_bsize
;
2349 set_masks
= &hdrs
[0].masks
;
2350 add_masks
= &hdrs
[1].masks
;
2351 set_vals
= &hdrs
[0].vals
;
2352 add_vals
= &hdrs
[1].vals
;
2354 action_size
= MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto
);
2355 action
= parse_attr
->mod_hdr_actions
+
2356 parse_attr
->num_mod_hdr_actions
* action_size
;
2358 max_actions
= parse_attr
->max_mod_hdr_actions
;
2359 nactions
= parse_attr
->num_mod_hdr_actions
;
2361 for (i
= 0; i
< ARRAY_SIZE(fields
); i
++) {
2365 /* avoid seeing bits set from previous iterations */
2369 s_masks_p
= (void *)set_masks
+ f
->offset
;
2370 a_masks_p
= (void *)add_masks
+ f
->offset
;
2372 memcpy(&s_mask
, s_masks_p
, f
->size
);
2373 memcpy(&a_mask
, a_masks_p
, f
->size
);
2375 if (!s_mask
&& !a_mask
) /* nothing to offload here */
2378 if (s_mask
&& a_mask
) {
2379 NL_SET_ERR_MSG_MOD(extack
,
2380 "can't set and add to the same HW field");
2381 printk(KERN_WARNING
"mlx5: can't set and add to the same HW field (%x)\n", f
->field
);
2385 if (nactions
== max_actions
) {
2386 NL_SET_ERR_MSG_MOD(extack
,
2387 "too many pedit actions, can't offload");
2388 printk(KERN_WARNING
"mlx5: parsed %d pedit actions, can't do more\n", nactions
);
2394 void *match_mask
= headers_c
+ f
->match_offset
;
2395 void *match_val
= headers_v
+ f
->match_offset
;
2397 cmd
= MLX5_ACTION_TYPE_SET
;
2399 vals_p
= (void *)set_vals
+ f
->offset
;
2400 /* don't rewrite if we have a match on the same value */
2401 if (cmp_val_mask(vals_p
, s_masks_p
, match_val
,
2402 match_mask
, f
->size
))
2404 /* clear to denote we consumed this field */
2405 memset(s_masks_p
, 0, f
->size
);
2409 cmd
= MLX5_ACTION_TYPE_ADD
;
2411 vals_p
= (void *)add_vals
+ f
->offset
;
2412 /* add 0 is no change */
2413 if (!memcmp(vals_p
, &zero
, f
->size
))
2415 /* clear to denote we consumed this field */
2416 memset(a_masks_p
, 0, f
->size
);
2421 field_bsize
= f
->size
* BITS_PER_BYTE
;
2423 if (field_bsize
== 32) {
2424 mask_be32
= *(__be32
*)&mask
;
2425 mask
= (__force
unsigned long)cpu_to_le32(be32_to_cpu(mask_be32
));
2426 } else if (field_bsize
== 16) {
2427 mask_be16
= *(__be16
*)&mask
;
2428 mask
= (__force
unsigned long)cpu_to_le16(be16_to_cpu(mask_be16
));
2431 first
= find_first_bit(&mask
, field_bsize
);
2432 next_z
= find_next_zero_bit(&mask
, field_bsize
, first
);
2433 last
= find_last_bit(&mask
, field_bsize
);
2434 if (first
< next_z
&& next_z
< last
) {
2435 NL_SET_ERR_MSG_MOD(extack
,
2436 "rewrite of few sub-fields isn't supported");
2437 printk(KERN_WARNING
"mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2442 MLX5_SET(set_action_in
, action
, action_type
, cmd
);
2443 MLX5_SET(set_action_in
, action
, field
, f
->field
);
2445 if (cmd
== MLX5_ACTION_TYPE_SET
) {
2446 MLX5_SET(set_action_in
, action
, offset
, first
);
2447 /* length is num of bits to be written, zero means length of 32 */
2448 MLX5_SET(set_action_in
, action
, length
, (last
- first
+ 1));
2451 if (field_bsize
== 32)
2452 MLX5_SET(set_action_in
, action
, data
, ntohl(*(__be32
*)vals_p
) >> first
);
2453 else if (field_bsize
== 16)
2454 MLX5_SET(set_action_in
, action
, data
, ntohs(*(__be16
*)vals_p
) >> first
);
2455 else if (field_bsize
== 8)
2456 MLX5_SET(set_action_in
, action
, data
, *(u8
*)vals_p
>> first
);
2458 action
+= action_size
;
2462 parse_attr
->num_mod_hdr_actions
= nactions
;
2466 static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev
*mdev
,
2469 if (namespace == MLX5_FLOW_NAMESPACE_FDB
) /* FDB offloading */
2470 return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev
, max_modify_header_actions
);
2471 else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
2472 return MLX5_CAP_FLOWTABLE_NIC_RX(mdev
, max_modify_header_actions
);
2475 static int alloc_mod_hdr_actions(struct mlx5e_priv
*priv
,
2476 struct pedit_headers_action
*hdrs
,
2478 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
2480 int nkeys
, action_size
, max_actions
;
2482 nkeys
= hdrs
[TCA_PEDIT_KEY_EX_CMD_SET
].pedits
+
2483 hdrs
[TCA_PEDIT_KEY_EX_CMD_ADD
].pedits
;
2484 action_size
= MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto
);
2486 max_actions
= mlx5e_flow_namespace_max_modify_action(priv
->mdev
, namespace);
2487 /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
2488 max_actions
= min(max_actions
, nkeys
* 16);
2490 parse_attr
->mod_hdr_actions
= kcalloc(max_actions
, action_size
, GFP_KERNEL
);
2491 if (!parse_attr
->mod_hdr_actions
)
2494 parse_attr
->max_mod_hdr_actions
= max_actions
;
2498 static const struct pedit_headers zero_masks
= {};
2500 static int parse_tc_pedit_action(struct mlx5e_priv
*priv
,
2501 const struct flow_action_entry
*act
, int namespace,
2502 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2503 struct pedit_headers_action
*hdrs
,
2504 struct netlink_ext_ack
*extack
)
2506 u8 cmd
= (act
->id
== FLOW_ACTION_MANGLE
) ? 0 : 1;
2507 int err
= -EOPNOTSUPP
;
2508 u32 mask
, val
, offset
;
2511 htype
= act
->mangle
.htype
;
2512 err
= -EOPNOTSUPP
; /* can't be all optimistic */
2514 if (htype
== FLOW_ACT_MANGLE_UNSPEC
) {
2515 NL_SET_ERR_MSG_MOD(extack
, "legacy pedit isn't offloaded");
2519 if (!mlx5e_flow_namespace_max_modify_action(priv
->mdev
, namespace)) {
2520 NL_SET_ERR_MSG_MOD(extack
,
2521 "The pedit offload action is not supported");
2525 mask
= act
->mangle
.mask
;
2526 val
= act
->mangle
.val
;
2527 offset
= act
->mangle
.offset
;
2529 err
= set_pedit_val(htype
, ~mask
, val
, offset
, &hdrs
[cmd
]);
2540 static int alloc_tc_pedit_action(struct mlx5e_priv
*priv
, int namespace,
2541 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2542 struct pedit_headers_action
*hdrs
,
2544 struct netlink_ext_ack
*extack
)
2546 struct pedit_headers
*cmd_masks
;
2550 if (!parse_attr
->mod_hdr_actions
) {
2551 err
= alloc_mod_hdr_actions(priv
, hdrs
, namespace, parse_attr
);
2556 err
= offload_pedit_fields(hdrs
, parse_attr
, action_flags
, extack
);
2558 goto out_dealloc_parsed_actions
;
2560 for (cmd
= 0; cmd
< __PEDIT_CMD_MAX
; cmd
++) {
2561 cmd_masks
= &hdrs
[cmd
].masks
;
2562 if (memcmp(cmd_masks
, &zero_masks
, sizeof(zero_masks
))) {
2563 NL_SET_ERR_MSG_MOD(extack
,
2564 "attempt to offload an unsupported field");
2565 netdev_warn(priv
->netdev
, "attempt to offload an unsupported field (cmd %d)\n", cmd
);
2566 print_hex_dump(KERN_WARNING
, "mask: ", DUMP_PREFIX_ADDRESS
,
2567 16, 1, cmd_masks
, sizeof(zero_masks
), true);
2569 goto out_dealloc_parsed_actions
;
2575 out_dealloc_parsed_actions
:
2576 kfree(parse_attr
->mod_hdr_actions
);
2581 static bool csum_offload_supported(struct mlx5e_priv
*priv
,
2584 struct netlink_ext_ack
*extack
)
2586 u32 prot_flags
= TCA_CSUM_UPDATE_FLAG_IPV4HDR
| TCA_CSUM_UPDATE_FLAG_TCP
|
2587 TCA_CSUM_UPDATE_FLAG_UDP
;
2589 /* The HW recalcs checksums only if re-writing headers */
2590 if (!(action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)) {
2591 NL_SET_ERR_MSG_MOD(extack
,
2592 "TC csum action is only offloaded with pedit");
2593 netdev_warn(priv
->netdev
,
2594 "TC csum action is only offloaded with pedit\n");
2598 if (update_flags
& ~prot_flags
) {
2599 NL_SET_ERR_MSG_MOD(extack
,
2600 "can't offload TC csum action for some header/s");
2601 netdev_warn(priv
->netdev
,
2602 "can't offload TC csum action for some header/s - flags %#x\n",
2610 struct ip_ttl_word
{
2616 struct ipv6_hoplimit_word
{
2622 static bool is_action_keys_supported(const struct flow_action_entry
*act
)
2627 htype
= act
->mangle
.htype
;
2628 offset
= act
->mangle
.offset
;
2629 mask
= ~act
->mangle
.mask
;
2630 /* For IPv4 & IPv6 header check 4 byte word,
2631 * to determine that modified fields
2632 * are NOT ttl & hop_limit only.
2634 if (htype
== FLOW_ACT_MANGLE_HDR_TYPE_IP4
) {
2635 struct ip_ttl_word
*ttl_word
=
2636 (struct ip_ttl_word
*)&mask
;
2638 if (offset
!= offsetof(struct iphdr
, ttl
) ||
2639 ttl_word
->protocol
||
2643 } else if (htype
== FLOW_ACT_MANGLE_HDR_TYPE_IP6
) {
2644 struct ipv6_hoplimit_word
*hoplimit_word
=
2645 (struct ipv6_hoplimit_word
*)&mask
;
2647 if (offset
!= offsetof(struct ipv6hdr
, payload_len
) ||
2648 hoplimit_word
->payload_len
||
2649 hoplimit_word
->nexthdr
) {
2656 static bool modify_header_match_supported(struct mlx5_flow_spec
*spec
,
2657 struct flow_action
*flow_action
,
2659 struct netlink_ext_ack
*extack
)
2661 const struct flow_action_entry
*act
;
2662 bool modify_ip_header
;
2668 headers_v
= get_match_headers_value(actions
, spec
);
2669 ethertype
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ethertype
);
2671 /* for non-IP we only re-write MACs, so we're okay */
2672 if (ethertype
!= ETH_P_IP
&& ethertype
!= ETH_P_IPV6
)
2675 modify_ip_header
= false;
2676 flow_action_for_each(i
, act
, flow_action
) {
2677 if (act
->id
!= FLOW_ACTION_MANGLE
&&
2678 act
->id
!= FLOW_ACTION_ADD
)
2681 if (is_action_keys_supported(act
)) {
2682 modify_ip_header
= true;
2687 ip_proto
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
);
2688 if (modify_ip_header
&& ip_proto
!= IPPROTO_TCP
&&
2689 ip_proto
!= IPPROTO_UDP
&& ip_proto
!= IPPROTO_ICMP
) {
2690 NL_SET_ERR_MSG_MOD(extack
,
2691 "can't offload re-write of non TCP/UDP");
2692 pr_info("can't offload re-write of ip proto %d\n", ip_proto
);
2700 static bool actions_match_supported(struct mlx5e_priv
*priv
,
2701 struct flow_action
*flow_action
,
2702 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2703 struct mlx5e_tc_flow
*flow
,
2704 struct netlink_ext_ack
*extack
)
2708 if (mlx5e_is_eswitch_flow(flow
))
2709 actions
= flow
->esw_attr
->action
;
2711 actions
= flow
->nic_attr
->action
;
2713 if (flow_flag_test(flow
, EGRESS
) &&
2714 !((actions
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ||
2715 (actions
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
) ||
2716 (actions
& MLX5_FLOW_CONTEXT_ACTION_DROP
)))
2719 if (actions
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
2720 return modify_header_match_supported(&parse_attr
->spec
,
2721 flow_action
, actions
,
2727 static bool same_hw_devs(struct mlx5e_priv
*priv
, struct mlx5e_priv
*peer_priv
)
2729 struct mlx5_core_dev
*fmdev
, *pmdev
;
2730 u64 fsystem_guid
, psystem_guid
;
2733 pmdev
= peer_priv
->mdev
;
2735 fsystem_guid
= mlx5_query_nic_system_image_guid(fmdev
);
2736 psystem_guid
= mlx5_query_nic_system_image_guid(pmdev
);
2738 return (fsystem_guid
== psystem_guid
);
2741 static int add_vlan_rewrite_action(struct mlx5e_priv
*priv
, int namespace,
2742 const struct flow_action_entry
*act
,
2743 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2744 struct pedit_headers_action
*hdrs
,
2745 u32
*action
, struct netlink_ext_ack
*extack
)
2747 u16 mask16
= VLAN_VID_MASK
;
2748 u16 val16
= act
->vlan
.vid
& VLAN_VID_MASK
;
2749 const struct flow_action_entry pedit_act
= {
2750 .id
= FLOW_ACTION_MANGLE
,
2751 .mangle
.htype
= FLOW_ACT_MANGLE_HDR_TYPE_ETH
,
2752 .mangle
.offset
= offsetof(struct vlan_ethhdr
, h_vlan_TCI
),
2753 .mangle
.mask
= ~(u32
)be16_to_cpu(*(__be16
*)&mask16
),
2754 .mangle
.val
= (u32
)be16_to_cpu(*(__be16
*)&val16
),
2756 u8 match_prio_mask
, match_prio_val
;
2757 void *headers_c
, *headers_v
;
2760 headers_c
= get_match_headers_criteria(*action
, &parse_attr
->spec
);
2761 headers_v
= get_match_headers_value(*action
, &parse_attr
->spec
);
2763 if (!(MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
) &&
2764 MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
))) {
2765 NL_SET_ERR_MSG_MOD(extack
,
2766 "VLAN rewrite action must have VLAN protocol match");
2770 match_prio_mask
= MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, first_prio
);
2771 match_prio_val
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, first_prio
);
2772 if (act
->vlan
.prio
!= (match_prio_val
& match_prio_mask
)) {
2773 NL_SET_ERR_MSG_MOD(extack
,
2774 "Changing VLAN prio is not supported");
2778 err
= parse_tc_pedit_action(priv
, &pedit_act
, namespace, parse_attr
,
2780 *action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
2786 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv
*priv
,
2787 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2788 struct pedit_headers_action
*hdrs
,
2789 u32
*action
, struct netlink_ext_ack
*extack
)
2791 const struct flow_action_entry prio_tag_act
= {
2794 MLX5_GET(fte_match_set_lyr_2_4
,
2795 get_match_headers_value(*action
,
2798 MLX5_GET(fte_match_set_lyr_2_4
,
2799 get_match_headers_criteria(*action
,
2804 return add_vlan_rewrite_action(priv
, MLX5_FLOW_NAMESPACE_FDB
,
2805 &prio_tag_act
, parse_attr
, hdrs
, action
,
2809 static int parse_tc_nic_actions(struct mlx5e_priv
*priv
,
2810 struct flow_action
*flow_action
,
2811 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2812 struct mlx5e_tc_flow
*flow
,
2813 struct netlink_ext_ack
*extack
)
2815 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
2816 struct pedit_headers_action hdrs
[2] = {};
2817 const struct flow_action_entry
*act
;
2821 if (!flow_action_has_entries(flow_action
))
2824 attr
->flow_tag
= MLX5_FS_DEFAULT_FLOW_TAG
;
2826 flow_action_for_each(i
, act
, flow_action
) {
2828 case FLOW_ACTION_DROP
:
2829 action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
;
2830 if (MLX5_CAP_FLOWTABLE(priv
->mdev
,
2831 flow_table_properties_nic_receive
.flow_counter
))
2832 action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
2834 case FLOW_ACTION_MANGLE
:
2835 case FLOW_ACTION_ADD
:
2836 err
= parse_tc_pedit_action(priv
, act
, MLX5_FLOW_NAMESPACE_KERNEL
,
2837 parse_attr
, hdrs
, extack
);
2841 action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
|
2842 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
2844 case FLOW_ACTION_VLAN_MANGLE
:
2845 err
= add_vlan_rewrite_action(priv
,
2846 MLX5_FLOW_NAMESPACE_KERNEL
,
2847 act
, parse_attr
, hdrs
,
2853 case FLOW_ACTION_CSUM
:
2854 if (csum_offload_supported(priv
, action
,
2860 case FLOW_ACTION_REDIRECT
: {
2861 struct net_device
*peer_dev
= act
->dev
;
2863 if (priv
->netdev
->netdev_ops
== peer_dev
->netdev_ops
&&
2864 same_hw_devs(priv
, netdev_priv(peer_dev
))) {
2865 parse_attr
->mirred_ifindex
[0] = peer_dev
->ifindex
;
2866 flow_flag_set(flow
, HAIRPIN
);
2867 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
2868 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
2870 NL_SET_ERR_MSG_MOD(extack
,
2871 "device is not on same HW, can't offload");
2872 netdev_warn(priv
->netdev
, "device %s not on same HW, can't offload\n",
2878 case FLOW_ACTION_MARK
: {
2879 u32 mark
= act
->mark
;
2881 if (mark
& ~MLX5E_TC_FLOW_ID_MASK
) {
2882 NL_SET_ERR_MSG_MOD(extack
,
2883 "Bad flow mark - only 16 bit is supported");
2887 attr
->flow_tag
= mark
;
2888 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
2892 NL_SET_ERR_MSG_MOD(extack
, "The offload action is not supported");
2897 if (hdrs
[TCA_PEDIT_KEY_EX_CMD_SET
].pedits
||
2898 hdrs
[TCA_PEDIT_KEY_EX_CMD_ADD
].pedits
) {
2899 err
= alloc_tc_pedit_action(priv
, MLX5_FLOW_NAMESPACE_KERNEL
,
2900 parse_attr
, hdrs
, &action
, extack
);
2903 /* in case all pedit actions are skipped, remove the MOD_HDR
2906 if (parse_attr
->num_mod_hdr_actions
== 0) {
2907 action
&= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
2908 kfree(parse_attr
->mod_hdr_actions
);
2912 attr
->action
= action
;
2913 if (!actions_match_supported(priv
, flow_action
, parse_attr
, flow
, extack
))
2920 const struct ip_tunnel_key
*ip_tun_key
;
2921 struct mlx5e_tc_tunnel
*tc_tunnel
;
2924 static inline int cmp_encap_info(struct encap_key
*a
,
2925 struct encap_key
*b
)
2927 return memcmp(a
->ip_tun_key
, b
->ip_tun_key
, sizeof(*a
->ip_tun_key
)) ||
2928 a
->tc_tunnel
->tunnel_type
!= b
->tc_tunnel
->tunnel_type
;
2931 static inline int hash_encap_info(struct encap_key
*key
)
2933 return jhash(key
->ip_tun_key
, sizeof(*key
->ip_tun_key
),
2934 key
->tc_tunnel
->tunnel_type
);
2938 static bool is_merged_eswitch_dev(struct mlx5e_priv
*priv
,
2939 struct net_device
*peer_netdev
)
2941 struct mlx5e_priv
*peer_priv
;
2943 peer_priv
= netdev_priv(peer_netdev
);
2945 return (MLX5_CAP_ESW(priv
->mdev
, merged_eswitch
) &&
2946 mlx5e_eswitch_rep(priv
->netdev
) &&
2947 mlx5e_eswitch_rep(peer_netdev
) &&
2948 same_hw_devs(priv
, peer_priv
));
2953 bool mlx5e_encap_take(struct mlx5e_encap_entry
*e
)
2955 return refcount_inc_not_zero(&e
->refcnt
);
2958 static struct mlx5e_encap_entry
*
2959 mlx5e_encap_get(struct mlx5e_priv
*priv
, struct encap_key
*key
,
2962 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
2963 struct mlx5e_encap_entry
*e
;
2964 struct encap_key e_key
;
2966 hash_for_each_possible_rcu(esw
->offloads
.encap_tbl
, e
,
2967 encap_hlist
, hash_key
) {
2968 e_key
.ip_tun_key
= &e
->tun_info
->key
;
2969 e_key
.tc_tunnel
= e
->tunnel
;
2970 if (!cmp_encap_info(&e_key
, key
) &&
2971 mlx5e_encap_take(e
))
2978 static struct ip_tunnel_info
*dup_tun_info(const struct ip_tunnel_info
*tun_info
)
2980 size_t tun_size
= sizeof(*tun_info
) + tun_info
->options_len
;
2982 return kmemdup(tun_info
, tun_size
, GFP_KERNEL
);
2985 static int mlx5e_attach_encap(struct mlx5e_priv
*priv
,
2986 struct mlx5e_tc_flow
*flow
,
2987 struct net_device
*mirred_dev
,
2989 struct netlink_ext_ack
*extack
,
2990 struct net_device
**encap_dev
,
2993 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
2994 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
2995 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
2996 const struct ip_tunnel_info
*tun_info
;
2997 struct encap_key key
;
2998 struct mlx5e_encap_entry
*e
;
2999 unsigned short family
;
3003 parse_attr
= attr
->parse_attr
;
3004 tun_info
= parse_attr
->tun_info
[out_index
];
3005 family
= ip_tunnel_info_af(tun_info
);
3006 key
.ip_tun_key
= &tun_info
->key
;
3007 key
.tc_tunnel
= mlx5e_get_tc_tun(mirred_dev
);
3008 if (!key
.tc_tunnel
) {
3009 NL_SET_ERR_MSG_MOD(extack
, "Unsupported tunnel");
3013 hash_key
= hash_encap_info(&key
);
3015 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
3016 e
= mlx5e_encap_get(priv
, &key
, hash_key
);
3018 /* must verify if encap is valid or not */
3020 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3021 wait_for_completion(&e
->res_ready
);
3023 /* Protect against concurrent neigh update. */
3024 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
3025 if (e
->compl_result
< 0) {
3032 e
= kzalloc(sizeof(*e
), GFP_KERNEL
);
3038 refcount_set(&e
->refcnt
, 1);
3039 init_completion(&e
->res_ready
);
3041 tun_info
= dup_tun_info(tun_info
);
3046 e
->tun_info
= tun_info
;
3047 err
= mlx5e_tc_tun_init_encap_attr(mirred_dev
, priv
, e
, extack
);
3051 INIT_LIST_HEAD(&e
->flows
);
3052 hash_add_rcu(esw
->offloads
.encap_tbl
, &e
->encap_hlist
, hash_key
);
3053 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3055 if (family
== AF_INET
)
3056 err
= mlx5e_tc_tun_create_header_ipv4(priv
, mirred_dev
, e
);
3057 else if (family
== AF_INET6
)
3058 err
= mlx5e_tc_tun_create_header_ipv6(priv
, mirred_dev
, e
);
3060 /* Protect against concurrent neigh update. */
3061 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
3062 complete_all(&e
->res_ready
);
3064 e
->compl_result
= err
;
3067 e
->compl_result
= 1;
3070 flow
->encaps
[out_index
].e
= e
;
3071 list_add(&flow
->encaps
[out_index
].list
, &e
->flows
);
3072 flow
->encaps
[out_index
].index
= out_index
;
3073 *encap_dev
= e
->out_dev
;
3074 if (e
->flags
& MLX5_ENCAP_ENTRY_VALID
) {
3075 attr
->dests
[out_index
].pkt_reformat
= e
->pkt_reformat
;
3076 attr
->dests
[out_index
].flags
|= MLX5_ESW_DEST_ENCAP_VALID
;
3077 *encap_valid
= true;
3079 *encap_valid
= false;
3081 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3086 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3088 mlx5e_encap_put(priv
, e
);
3092 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3098 static int parse_tc_vlan_action(struct mlx5e_priv
*priv
,
3099 const struct flow_action_entry
*act
,
3100 struct mlx5_esw_flow_attr
*attr
,
3103 u8 vlan_idx
= attr
->total_vlan
;
3105 if (vlan_idx
>= MLX5_FS_VLAN_DEPTH
)
3109 case FLOW_ACTION_VLAN_POP
:
3111 if (!mlx5_eswitch_vlan_actions_supported(priv
->mdev
,
3112 MLX5_FS_VLAN_DEPTH
))
3115 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2
;
3117 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
3120 case FLOW_ACTION_VLAN_PUSH
:
3121 attr
->vlan_vid
[vlan_idx
] = act
->vlan
.vid
;
3122 attr
->vlan_prio
[vlan_idx
] = act
->vlan
.prio
;
3123 attr
->vlan_proto
[vlan_idx
] = act
->vlan
.proto
;
3124 if (!attr
->vlan_proto
[vlan_idx
])
3125 attr
->vlan_proto
[vlan_idx
] = htons(ETH_P_8021Q
);
3128 if (!mlx5_eswitch_vlan_actions_supported(priv
->mdev
,
3129 MLX5_FS_VLAN_DEPTH
))
3132 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2
;
3134 if (!mlx5_eswitch_vlan_actions_supported(priv
->mdev
, 1) &&
3135 (act
->vlan
.proto
!= htons(ETH_P_8021Q
) ||
3139 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
;
3146 attr
->total_vlan
= vlan_idx
+ 1;
3151 static int add_vlan_push_action(struct mlx5e_priv
*priv
,
3152 struct mlx5_esw_flow_attr
*attr
,
3153 struct net_device
**out_dev
,
3156 struct net_device
*vlan_dev
= *out_dev
;
3157 struct flow_action_entry vlan_act
= {
3158 .id
= FLOW_ACTION_VLAN_PUSH
,
3159 .vlan
.vid
= vlan_dev_vlan_id(vlan_dev
),
3160 .vlan
.proto
= vlan_dev_vlan_proto(vlan_dev
),
3165 err
= parse_tc_vlan_action(priv
, &vlan_act
, attr
, action
);
3169 *out_dev
= dev_get_by_index_rcu(dev_net(vlan_dev
),
3170 dev_get_iflink(vlan_dev
));
3171 if (is_vlan_dev(*out_dev
))
3172 err
= add_vlan_push_action(priv
, attr
, out_dev
, action
);
3177 static int add_vlan_pop_action(struct mlx5e_priv
*priv
,
3178 struct mlx5_esw_flow_attr
*attr
,
3181 int nest_level
= attr
->parse_attr
->filter_dev
->lower_level
;
3182 struct flow_action_entry vlan_act
= {
3183 .id
= FLOW_ACTION_VLAN_POP
,
3187 while (nest_level
--) {
3188 err
= parse_tc_vlan_action(priv
, &vlan_act
, attr
, action
);
3196 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv
*priv
,
3197 struct net_device
*out_dev
)
3199 if (is_merged_eswitch_dev(priv
, out_dev
))
3202 return mlx5e_eswitch_rep(out_dev
) &&
3203 same_hw_devs(priv
, netdev_priv(out_dev
));
3206 static int parse_tc_fdb_actions(struct mlx5e_priv
*priv
,
3207 struct flow_action
*flow_action
,
3208 struct mlx5e_tc_flow
*flow
,
3209 struct netlink_ext_ack
*extack
)
3211 struct pedit_headers_action hdrs
[2] = {};
3212 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3213 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
3214 struct mlx5e_tc_flow_parse_attr
*parse_attr
= attr
->parse_attr
;
3215 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3216 const struct ip_tunnel_info
*info
= NULL
;
3217 const struct flow_action_entry
*act
;
3222 if (!flow_action_has_entries(flow_action
))
3225 flow_action_for_each(i
, act
, flow_action
) {
3227 case FLOW_ACTION_DROP
:
3228 action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
|
3229 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3231 case FLOW_ACTION_MANGLE
:
3232 case FLOW_ACTION_ADD
:
3233 err
= parse_tc_pedit_action(priv
, act
, MLX5_FLOW_NAMESPACE_FDB
,
3234 parse_attr
, hdrs
, extack
);
3238 action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
3239 attr
->split_count
= attr
->out_count
;
3241 case FLOW_ACTION_CSUM
:
3242 if (csum_offload_supported(priv
, action
,
3243 act
->csum_flags
, extack
))
3247 case FLOW_ACTION_REDIRECT
:
3248 case FLOW_ACTION_MIRRED
: {
3249 struct mlx5e_priv
*out_priv
;
3250 struct net_device
*out_dev
;
3254 /* out_dev is NULL when filters with
3255 * non-existing mirred device are replayed to
3261 if (attr
->out_count
>= MLX5_MAX_FLOW_FWD_VPORTS
) {
3262 NL_SET_ERR_MSG_MOD(extack
,
3263 "can't support more output ports, can't offload forwarding");
3264 pr_err("can't support more than %d output ports, can't offload forwarding\n",
3269 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
3270 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3272 parse_attr
->mirred_ifindex
[attr
->out_count
] =
3274 parse_attr
->tun_info
[attr
->out_count
] = dup_tun_info(info
);
3275 if (!parse_attr
->tun_info
[attr
->out_count
])
3278 attr
->dests
[attr
->out_count
].flags
|=
3279 MLX5_ESW_DEST_ENCAP
;
3281 /* attr->dests[].rep is resolved when we
3284 } else if (netdev_port_same_parent_id(priv
->netdev
, out_dev
)) {
3285 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3286 struct net_device
*uplink_dev
= mlx5_eswitch_uplink_get_proto_dev(esw
, REP_ETH
);
3287 struct net_device
*uplink_upper
;
3291 netdev_master_upper_dev_get_rcu(uplink_dev
);
3293 netif_is_lag_master(uplink_upper
) &&
3294 uplink_upper
== out_dev
)
3295 out_dev
= uplink_dev
;
3298 if (is_vlan_dev(out_dev
)) {
3299 err
= add_vlan_push_action(priv
, attr
,
3306 if (is_vlan_dev(parse_attr
->filter_dev
)) {
3307 err
= add_vlan_pop_action(priv
, attr
,
3313 if (!mlx5e_is_valid_eswitch_fwd_dev(priv
, out_dev
)) {
3314 NL_SET_ERR_MSG_MOD(extack
,
3315 "devices are not on same switch HW, can't offload forwarding");
3316 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
3317 priv
->netdev
->name
, out_dev
->name
);
3321 out_priv
= netdev_priv(out_dev
);
3322 rpriv
= out_priv
->ppriv
;
3323 attr
->dests
[attr
->out_count
].rep
= rpriv
->rep
;
3324 attr
->dests
[attr
->out_count
].mdev
= out_priv
->mdev
;
3326 } else if (parse_attr
->filter_dev
!= priv
->netdev
) {
3327 /* All mlx5 devices are called to configure
3328 * high level device filters. Therefore, the
3329 * *attempt* to install a filter on invalid
3330 * eswitch should not trigger an explicit error
3334 NL_SET_ERR_MSG_MOD(extack
,
3335 "devices are not on same switch HW, can't offload forwarding");
3336 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
3337 priv
->netdev
->name
, out_dev
->name
);
3342 case FLOW_ACTION_TUNNEL_ENCAP
:
3350 case FLOW_ACTION_VLAN_PUSH
:
3351 case FLOW_ACTION_VLAN_POP
:
3352 if (act
->id
== FLOW_ACTION_VLAN_PUSH
&&
3353 (action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
)) {
3354 /* Replace vlan pop+push with vlan modify */
3355 action
&= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
3356 err
= add_vlan_rewrite_action(priv
,
3357 MLX5_FLOW_NAMESPACE_FDB
,
3358 act
, parse_attr
, hdrs
,
3361 err
= parse_tc_vlan_action(priv
, act
, attr
, &action
);
3366 attr
->split_count
= attr
->out_count
;
3368 case FLOW_ACTION_VLAN_MANGLE
:
3369 err
= add_vlan_rewrite_action(priv
,
3370 MLX5_FLOW_NAMESPACE_FDB
,
3371 act
, parse_attr
, hdrs
,
3376 attr
->split_count
= attr
->out_count
;
3378 case FLOW_ACTION_TUNNEL_DECAP
:
3379 action
|= MLX5_FLOW_CONTEXT_ACTION_DECAP
;
3381 case FLOW_ACTION_GOTO
: {
3382 u32 dest_chain
= act
->chain_index
;
3383 u32 max_chain
= mlx5_eswitch_get_chain_range(esw
);
3385 if (dest_chain
<= attr
->chain
) {
3386 NL_SET_ERR_MSG(extack
, "Goto earlier chain isn't supported");
3389 if (dest_chain
> max_chain
) {
3390 NL_SET_ERR_MSG(extack
, "Requested destination chain is out of supported range");
3393 action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3394 attr
->dest_chain
= dest_chain
;
3398 NL_SET_ERR_MSG_MOD(extack
, "The offload action is not supported");
3403 if (MLX5_CAP_GEN(esw
->dev
, prio_tag_required
) &&
3404 action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
) {
3405 /* For prio tag mode, replace vlan pop with rewrite vlan prio
3408 action
&= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
3409 err
= add_vlan_prio_tag_rewrite_action(priv
, parse_attr
, hdrs
,
3415 if (hdrs
[TCA_PEDIT_KEY_EX_CMD_SET
].pedits
||
3416 hdrs
[TCA_PEDIT_KEY_EX_CMD_ADD
].pedits
) {
3417 err
= alloc_tc_pedit_action(priv
, MLX5_FLOW_NAMESPACE_FDB
,
3418 parse_attr
, hdrs
, &action
, extack
);
3421 /* in case all pedit actions are skipped, remove the MOD_HDR
3422 * flag. we might have set split_count either by pedit or
3423 * pop/push. if there is no pop/push either, reset it too.
3425 if (parse_attr
->num_mod_hdr_actions
== 0) {
3426 action
&= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
3427 kfree(parse_attr
->mod_hdr_actions
);
3428 if (!((action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
) ||
3429 (action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
)))
3430 attr
->split_count
= 0;
3434 attr
->action
= action
;
3435 if (!actions_match_supported(priv
, flow_action
, parse_attr
, flow
, extack
))
3438 if (attr
->dest_chain
) {
3439 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
3440 NL_SET_ERR_MSG(extack
, "Mirroring goto chain rules isn't supported");
3443 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
3446 if (attr
->split_count
> 0 && !mlx5_esw_has_fwd_fdb(priv
->mdev
)) {
3447 NL_SET_ERR_MSG_MOD(extack
,
3448 "current firmware doesn't support split rule for port mirroring");
3449 netdev_warn_once(priv
->netdev
, "current firmware doesn't support split rule for port mirroring\n");
3456 static void get_flags(int flags
, unsigned long *flow_flags
)
3458 unsigned long __flow_flags
= 0;
3460 if (flags
& MLX5_TC_FLAG(INGRESS
))
3461 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_INGRESS
);
3462 if (flags
& MLX5_TC_FLAG(EGRESS
))
3463 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_EGRESS
);
3465 if (flags
& MLX5_TC_FLAG(ESW_OFFLOAD
))
3466 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH
);
3467 if (flags
& MLX5_TC_FLAG(NIC_OFFLOAD
))
3468 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_NIC
);
3470 *flow_flags
= __flow_flags
;
3473 static const struct rhashtable_params tc_ht_params
= {
3474 .head_offset
= offsetof(struct mlx5e_tc_flow
, node
),
3475 .key_offset
= offsetof(struct mlx5e_tc_flow
, cookie
),
3476 .key_len
= sizeof(((struct mlx5e_tc_flow
*)0)->cookie
),
3477 .automatic_shrinking
= true,
3480 static struct rhashtable
*get_tc_ht(struct mlx5e_priv
*priv
,
3481 unsigned long flags
)
3483 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3484 struct mlx5e_rep_priv
*uplink_rpriv
;
3486 if (flags
& MLX5_TC_FLAG(ESW_OFFLOAD
)) {
3487 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
3488 return &uplink_rpriv
->uplink_priv
.tc_ht
;
3489 } else /* NIC offload */
3490 return &priv
->fs
.tc
.ht
;
3493 static bool is_peer_flow_needed(struct mlx5e_tc_flow
*flow
)
3495 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
3496 bool is_rep_ingress
= attr
->in_rep
->vport
!= MLX5_VPORT_UPLINK
&&
3497 flow_flag_test(flow
, INGRESS
);
3498 bool act_is_encap
= !!(attr
->action
&
3499 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT
);
3500 bool esw_paired
= mlx5_devcom_is_paired(attr
->in_mdev
->priv
.devcom
,
3501 MLX5_DEVCOM_ESW_OFFLOADS
);
3506 if ((mlx5_lag_is_sriov(attr
->in_mdev
) ||
3507 mlx5_lag_is_multipath(attr
->in_mdev
)) &&
3508 (is_rep_ingress
|| act_is_encap
))
3515 mlx5e_alloc_flow(struct mlx5e_priv
*priv
, int attr_size
,
3516 struct flow_cls_offload
*f
, unsigned long flow_flags
,
3517 struct mlx5e_tc_flow_parse_attr
**__parse_attr
,
3518 struct mlx5e_tc_flow
**__flow
)
3520 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3521 struct mlx5e_tc_flow
*flow
;
3524 flow
= kzalloc(sizeof(*flow
) + attr_size
, GFP_KERNEL
);
3525 parse_attr
= kvzalloc(sizeof(*parse_attr
), GFP_KERNEL
);
3526 if (!parse_attr
|| !flow
) {
3531 flow
->cookie
= f
->cookie
;
3532 flow
->flags
= flow_flags
;
3534 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++)
3535 INIT_LIST_HEAD(&flow
->encaps
[out_index
].list
);
3536 INIT_LIST_HEAD(&flow
->mod_hdr
);
3537 INIT_LIST_HEAD(&flow
->hairpin
);
3538 refcount_set(&flow
->refcnt
, 1);
3539 init_completion(&flow
->init_done
);
3542 *__parse_attr
= parse_attr
;
3553 mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr
*esw_attr
,
3554 struct mlx5e_priv
*priv
,
3555 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3556 struct flow_cls_offload
*f
,
3557 struct mlx5_eswitch_rep
*in_rep
,
3558 struct mlx5_core_dev
*in_mdev
)
3560 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3562 esw_attr
->parse_attr
= parse_attr
;
3563 esw_attr
->chain
= f
->common
.chain_index
;
3564 esw_attr
->prio
= f
->common
.prio
;
3566 esw_attr
->in_rep
= in_rep
;
3567 esw_attr
->in_mdev
= in_mdev
;
3569 if (MLX5_CAP_ESW(esw
->dev
, counter_eswitch_affinity
) ==
3570 MLX5_COUNTER_SOURCE_ESWITCH
)
3571 esw_attr
->counter_dev
= in_mdev
;
3573 esw_attr
->counter_dev
= priv
->mdev
;
3576 static struct mlx5e_tc_flow
*
3577 __mlx5e_add_fdb_flow(struct mlx5e_priv
*priv
,
3578 struct flow_cls_offload
*f
,
3579 unsigned long flow_flags
,
3580 struct net_device
*filter_dev
,
3581 struct mlx5_eswitch_rep
*in_rep
,
3582 struct mlx5_core_dev
*in_mdev
)
3584 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
3585 struct netlink_ext_ack
*extack
= f
->common
.extack
;
3586 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3587 struct mlx5e_tc_flow
*flow
;
3590 flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH
);
3591 attr_size
= sizeof(struct mlx5_esw_flow_attr
);
3592 err
= mlx5e_alloc_flow(priv
, attr_size
, f
, flow_flags
,
3593 &parse_attr
, &flow
);
3597 parse_attr
->filter_dev
= filter_dev
;
3598 mlx5e_flow_esw_attr_init(flow
->esw_attr
,
3600 f
, in_rep
, in_mdev
);
3602 err
= parse_cls_flower(flow
->priv
, flow
, &parse_attr
->spec
,
3607 err
= parse_tc_fdb_actions(priv
, &rule
->action
, flow
, extack
);
3611 err
= mlx5e_tc_add_fdb_flow(priv
, flow
, extack
);
3612 complete_all(&flow
->init_done
);
3614 if (!(err
== -ENETUNREACH
&& mlx5_lag_is_multipath(in_mdev
)))
3617 add_unready_flow(flow
);
3623 mlx5e_flow_put(priv
, flow
);
3625 return ERR_PTR(err
);
3628 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload
*f
,
3629 struct mlx5e_tc_flow
*flow
,
3630 unsigned long flow_flags
)
3632 struct mlx5e_priv
*priv
= flow
->priv
, *peer_priv
;
3633 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
, *peer_esw
;
3634 struct mlx5_devcom
*devcom
= priv
->mdev
->priv
.devcom
;
3635 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3636 struct mlx5e_rep_priv
*peer_urpriv
;
3637 struct mlx5e_tc_flow
*peer_flow
;
3638 struct mlx5_core_dev
*in_mdev
;
3641 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
3645 peer_urpriv
= mlx5_eswitch_get_uplink_priv(peer_esw
, REP_ETH
);
3646 peer_priv
= netdev_priv(peer_urpriv
->netdev
);
3648 /* in_mdev is assigned of which the packet originated from.
3649 * So packets redirected to uplink use the same mdev of the
3650 * original flow and packets redirected from uplink use the
3653 if (flow
->esw_attr
->in_rep
->vport
== MLX5_VPORT_UPLINK
)
3654 in_mdev
= peer_priv
->mdev
;
3656 in_mdev
= priv
->mdev
;
3658 parse_attr
= flow
->esw_attr
->parse_attr
;
3659 peer_flow
= __mlx5e_add_fdb_flow(peer_priv
, f
, flow_flags
,
3660 parse_attr
->filter_dev
,
3661 flow
->esw_attr
->in_rep
, in_mdev
);
3662 if (IS_ERR(peer_flow
)) {
3663 err
= PTR_ERR(peer_flow
);
3667 flow
->peer_flow
= peer_flow
;
3668 flow_flag_set(flow
, DUP
);
3669 mutex_lock(&esw
->offloads
.peer_mutex
);
3670 list_add_tail(&flow
->peer
, &esw
->offloads
.peer_flows
);
3671 mutex_unlock(&esw
->offloads
.peer_mutex
);
3674 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
3679 mlx5e_add_fdb_flow(struct mlx5e_priv
*priv
,
3680 struct flow_cls_offload
*f
,
3681 unsigned long flow_flags
,
3682 struct net_device
*filter_dev
,
3683 struct mlx5e_tc_flow
**__flow
)
3685 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3686 struct mlx5_eswitch_rep
*in_rep
= rpriv
->rep
;
3687 struct mlx5_core_dev
*in_mdev
= priv
->mdev
;
3688 struct mlx5e_tc_flow
*flow
;
3691 flow
= __mlx5e_add_fdb_flow(priv
, f
, flow_flags
, filter_dev
, in_rep
,
3694 return PTR_ERR(flow
);
3696 if (is_peer_flow_needed(flow
)) {
3697 err
= mlx5e_tc_add_fdb_peer_flow(f
, flow
, flow_flags
);
3699 mlx5e_tc_del_fdb_flow(priv
, flow
);
3713 mlx5e_add_nic_flow(struct mlx5e_priv
*priv
,
3714 struct flow_cls_offload
*f
,
3715 unsigned long flow_flags
,
3716 struct net_device
*filter_dev
,
3717 struct mlx5e_tc_flow
**__flow
)
3719 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
3720 struct netlink_ext_ack
*extack
= f
->common
.extack
;
3721 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3722 struct mlx5e_tc_flow
*flow
;
3725 /* multi-chain not supported for NIC rules */
3726 if (!tc_cls_can_offload_and_chain0(priv
->netdev
, &f
->common
))
3729 flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_NIC
);
3730 attr_size
= sizeof(struct mlx5_nic_flow_attr
);
3731 err
= mlx5e_alloc_flow(priv
, attr_size
, f
, flow_flags
,
3732 &parse_attr
, &flow
);
3736 parse_attr
->filter_dev
= filter_dev
;
3737 err
= parse_cls_flower(flow
->priv
, flow
, &parse_attr
->spec
,
3742 err
= parse_tc_nic_actions(priv
, &rule
->action
, parse_attr
, flow
, extack
);
3746 err
= mlx5e_tc_add_nic_flow(priv
, parse_attr
, flow
, extack
);
3750 flow_flag_set(flow
, OFFLOADED
);
3757 mlx5e_flow_put(priv
, flow
);
3764 mlx5e_tc_add_flow(struct mlx5e_priv
*priv
,
3765 struct flow_cls_offload
*f
,
3766 unsigned long flags
,
3767 struct net_device
*filter_dev
,
3768 struct mlx5e_tc_flow
**flow
)
3770 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3771 unsigned long flow_flags
;
3774 get_flags(flags
, &flow_flags
);
3776 if (!tc_can_offload_extack(priv
->netdev
, f
->common
.extack
))
3779 if (esw
&& esw
->mode
== MLX5_ESWITCH_OFFLOADS
)
3780 err
= mlx5e_add_fdb_flow(priv
, f
, flow_flags
,
3783 err
= mlx5e_add_nic_flow(priv
, f
, flow_flags
,
3789 int mlx5e_configure_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
3790 struct flow_cls_offload
*f
, unsigned long flags
)
3792 struct netlink_ext_ack
*extack
= f
->common
.extack
;
3793 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
3794 struct mlx5e_tc_flow
*flow
;
3798 flow
= rhashtable_lookup(tc_ht
, &f
->cookie
, tc_ht_params
);
3801 NL_SET_ERR_MSG_MOD(extack
,
3802 "flow cookie already exists, ignoring");
3803 netdev_warn_once(priv
->netdev
,
3804 "flow cookie %lx already exists, ignoring\n",
3810 trace_mlx5e_configure_flower(f
);
3811 err
= mlx5e_tc_add_flow(priv
, f
, flags
, dev
, &flow
);
3815 err
= rhashtable_lookup_insert_fast(tc_ht
, &flow
->node
, tc_ht_params
);
3822 mlx5e_flow_put(priv
, flow
);
3827 static bool same_flow_direction(struct mlx5e_tc_flow
*flow
, int flags
)
3829 bool dir_ingress
= !!(flags
& MLX5_TC_FLAG(INGRESS
));
3830 bool dir_egress
= !!(flags
& MLX5_TC_FLAG(EGRESS
));
3832 return flow_flag_test(flow
, INGRESS
) == dir_ingress
&&
3833 flow_flag_test(flow
, EGRESS
) == dir_egress
;
3836 int mlx5e_delete_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
3837 struct flow_cls_offload
*f
, unsigned long flags
)
3839 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
3840 struct mlx5e_tc_flow
*flow
;
3844 flow
= rhashtable_lookup_fast(tc_ht
, &f
->cookie
, tc_ht_params
);
3845 if (!flow
|| !same_flow_direction(flow
, flags
)) {
3850 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
3853 if (flow_flag_test_and_set(flow
, DELETED
)) {
3857 rhashtable_remove_fast(tc_ht
, &flow
->node
, tc_ht_params
);
3860 trace_mlx5e_delete_flower(f
);
3861 mlx5e_flow_put(priv
, flow
);
3870 int mlx5e_stats_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
3871 struct flow_cls_offload
*f
, unsigned long flags
)
3873 struct mlx5_devcom
*devcom
= priv
->mdev
->priv
.devcom
;
3874 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
3875 struct mlx5_eswitch
*peer_esw
;
3876 struct mlx5e_tc_flow
*flow
;
3877 struct mlx5_fc
*counter
;
3884 flow
= mlx5e_flow_get(rhashtable_lookup(tc_ht
, &f
->cookie
,
3888 return PTR_ERR(flow
);
3890 if (!same_flow_direction(flow
, flags
)) {
3895 if (mlx5e_is_offloaded_flow(flow
)) {
3896 counter
= mlx5e_tc_get_counter(flow
);
3900 mlx5_fc_query_cached(counter
, &bytes
, &packets
, &lastuse
);
3903 /* Under multipath it's possible for one rule to be currently
3904 * un-offloaded while the other rule is offloaded.
3906 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
3910 if (flow_flag_test(flow
, DUP
) &&
3911 flow_flag_test(flow
->peer_flow
, OFFLOADED
)) {
3916 counter
= mlx5e_tc_get_counter(flow
->peer_flow
);
3918 goto no_peer_counter
;
3919 mlx5_fc_query_cached(counter
, &bytes2
, &packets2
, &lastuse2
);
3922 packets
+= packets2
;
3923 lastuse
= max_t(u64
, lastuse
, lastuse2
);
3927 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
3929 flow_stats_update(&f
->stats
, bytes
, packets
, lastuse
);
3930 trace_mlx5e_stats_flower(f
);
3932 mlx5e_flow_put(priv
, flow
);
3936 static int apply_police_params(struct mlx5e_priv
*priv
, u32 rate
,
3937 struct netlink_ext_ack
*extack
)
3939 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3940 struct mlx5_eswitch
*esw
;
3945 esw
= priv
->mdev
->priv
.eswitch
;
3946 /* rate is given in bytes/sec.
3947 * First convert to bits/sec and then round to the nearest mbit/secs.
3948 * mbit means million bits.
3949 * Moreover, if rate is non zero we choose to configure to a minimum of
3952 rate_mbps
= rate
? max_t(u32
, (rate
* 8 + 500000) / 1000000, 1) : 0;
3953 vport_num
= rpriv
->rep
->vport
;
3955 err
= mlx5_esw_modify_vport_rate(esw
, vport_num
, rate_mbps
);
3957 NL_SET_ERR_MSG_MOD(extack
, "failed applying action to hardware");
3962 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv
*priv
,
3963 struct flow_action
*flow_action
,
3964 struct netlink_ext_ack
*extack
)
3966 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3967 const struct flow_action_entry
*act
;
3971 if (!flow_action_has_entries(flow_action
)) {
3972 NL_SET_ERR_MSG_MOD(extack
, "matchall called with no action");
3976 if (!flow_offload_has_one_action(flow_action
)) {
3977 NL_SET_ERR_MSG_MOD(extack
, "matchall policing support only a single action");
3981 flow_action_for_each(i
, act
, flow_action
) {
3983 case FLOW_ACTION_POLICE
:
3984 err
= apply_police_params(priv
, act
->police
.rate_bytes_ps
, extack
);
3988 rpriv
->prev_vf_vport_stats
= priv
->stats
.vf_vport
;
3991 NL_SET_ERR_MSG_MOD(extack
, "mlx5 supports only police action for matchall");
3999 int mlx5e_tc_configure_matchall(struct mlx5e_priv
*priv
,
4000 struct tc_cls_matchall_offload
*ma
)
4002 struct netlink_ext_ack
*extack
= ma
->common
.extack
;
4004 if (ma
->common
.prio
!= 1) {
4005 NL_SET_ERR_MSG_MOD(extack
, "only priority 1 is supported");
4009 return scan_tc_matchall_fdb_actions(priv
, &ma
->rule
->action
, extack
);
4012 int mlx5e_tc_delete_matchall(struct mlx5e_priv
*priv
,
4013 struct tc_cls_matchall_offload
*ma
)
4015 struct netlink_ext_ack
*extack
= ma
->common
.extack
;
4017 return apply_police_params(priv
, 0, extack
);
4020 void mlx5e_tc_stats_matchall(struct mlx5e_priv
*priv
,
4021 struct tc_cls_matchall_offload
*ma
)
4023 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4024 struct rtnl_link_stats64 cur_stats
;
4028 cur_stats
= priv
->stats
.vf_vport
;
4029 dpkts
= cur_stats
.rx_packets
- rpriv
->prev_vf_vport_stats
.rx_packets
;
4030 dbytes
= cur_stats
.rx_bytes
- rpriv
->prev_vf_vport_stats
.rx_bytes
;
4031 rpriv
->prev_vf_vport_stats
= cur_stats
;
4032 flow_stats_update(&ma
->stats
, dpkts
, dbytes
, jiffies
);
4035 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv
*priv
,
4036 struct mlx5e_priv
*peer_priv
)
4038 struct mlx5_core_dev
*peer_mdev
= peer_priv
->mdev
;
4039 struct mlx5e_hairpin_entry
*hpe
, *tmp
;
4040 LIST_HEAD(init_wait_list
);
4044 if (!same_hw_devs(priv
, peer_priv
))
4047 peer_vhca_id
= MLX5_CAP_GEN(peer_mdev
, vhca_id
);
4049 mutex_lock(&priv
->fs
.tc
.hairpin_tbl_lock
);
4050 hash_for_each(priv
->fs
.tc
.hairpin_tbl
, bkt
, hpe
, hairpin_hlist
)
4051 if (refcount_inc_not_zero(&hpe
->refcnt
))
4052 list_add(&hpe
->dead_peer_wait_list
, &init_wait_list
);
4053 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
4055 list_for_each_entry_safe(hpe
, tmp
, &init_wait_list
, dead_peer_wait_list
) {
4056 wait_for_completion(&hpe
->res_ready
);
4057 if (!IS_ERR_OR_NULL(hpe
->hp
) && hpe
->peer_vhca_id
== peer_vhca_id
)
4058 hpe
->hp
->pair
->peer_gone
= true;
4060 mlx5e_hairpin_put(priv
, hpe
);
4064 static int mlx5e_tc_netdev_event(struct notifier_block
*this,
4065 unsigned long event
, void *ptr
)
4067 struct net_device
*ndev
= netdev_notifier_info_to_dev(ptr
);
4068 struct mlx5e_flow_steering
*fs
;
4069 struct mlx5e_priv
*peer_priv
;
4070 struct mlx5e_tc_table
*tc
;
4071 struct mlx5e_priv
*priv
;
4073 if (ndev
->netdev_ops
!= &mlx5e_netdev_ops
||
4074 event
!= NETDEV_UNREGISTER
||
4075 ndev
->reg_state
== NETREG_REGISTERED
)
4078 tc
= container_of(this, struct mlx5e_tc_table
, netdevice_nb
);
4079 fs
= container_of(tc
, struct mlx5e_flow_steering
, tc
);
4080 priv
= container_of(fs
, struct mlx5e_priv
, fs
);
4081 peer_priv
= netdev_priv(ndev
);
4082 if (priv
== peer_priv
||
4083 !(priv
->netdev
->features
& NETIF_F_HW_TC
))
4086 mlx5e_tc_hairpin_update_dead_peer(priv
, peer_priv
);
4091 int mlx5e_tc_nic_init(struct mlx5e_priv
*priv
)
4093 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
4096 mutex_init(&tc
->t_lock
);
4097 mutex_init(&tc
->mod_hdr
.lock
);
4098 hash_init(tc
->mod_hdr
.hlist
);
4099 mutex_init(&tc
->hairpin_tbl_lock
);
4100 hash_init(tc
->hairpin_tbl
);
4102 err
= rhashtable_init(&tc
->ht
, &tc_ht_params
);
4106 tc
->netdevice_nb
.notifier_call
= mlx5e_tc_netdev_event
;
4107 if (register_netdevice_notifier(&tc
->netdevice_nb
)) {
4108 tc
->netdevice_nb
.notifier_call
= NULL
;
4109 mlx5_core_warn(priv
->mdev
, "Failed to register netdev notifier\n");
4115 static void _mlx5e_tc_del_flow(void *ptr
, void *arg
)
4117 struct mlx5e_tc_flow
*flow
= ptr
;
4118 struct mlx5e_priv
*priv
= flow
->priv
;
4120 mlx5e_tc_del_flow(priv
, flow
);
4124 void mlx5e_tc_nic_cleanup(struct mlx5e_priv
*priv
)
4126 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
4128 if (tc
->netdevice_nb
.notifier_call
)
4129 unregister_netdevice_notifier(&tc
->netdevice_nb
);
4131 mutex_destroy(&tc
->mod_hdr
.lock
);
4132 mutex_destroy(&tc
->hairpin_tbl_lock
);
4134 rhashtable_destroy(&tc
->ht
);
4136 if (!IS_ERR_OR_NULL(tc
->t
)) {
4137 mlx5_destroy_flow_table(tc
->t
);
4140 mutex_destroy(&tc
->t_lock
);
4143 int mlx5e_tc_esw_init(struct rhashtable
*tc_ht
)
4145 return rhashtable_init(tc_ht
, &tc_ht_params
);
4148 void mlx5e_tc_esw_cleanup(struct rhashtable
*tc_ht
)
4150 rhashtable_free_and_destroy(tc_ht
, _mlx5e_tc_del_flow
, NULL
);
4153 int mlx5e_tc_num_filters(struct mlx5e_priv
*priv
, unsigned long flags
)
4155 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
4157 return atomic_read(&tc_ht
->nelems
);
4160 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch
*esw
)
4162 struct mlx5e_tc_flow
*flow
, *tmp
;
4164 list_for_each_entry_safe(flow
, tmp
, &esw
->offloads
.peer_flows
, peer
)
4165 __mlx5e_tc_del_fdb_peer_flow(flow
);
4168 void mlx5e_tc_reoffload_flows_work(struct work_struct
*work
)
4170 struct mlx5_rep_uplink_priv
*rpriv
=
4171 container_of(work
, struct mlx5_rep_uplink_priv
,
4172 reoffload_flows_work
);
4173 struct mlx5e_tc_flow
*flow
, *tmp
;
4175 mutex_lock(&rpriv
->unready_flows_lock
);
4176 list_for_each_entry_safe(flow
, tmp
, &rpriv
->unready_flows
, unready
) {
4177 if (!mlx5e_tc_add_fdb_flow(flow
->priv
, flow
, NULL
))
4178 unready_flow_del(flow
);
4180 mutex_unlock(&rpriv
->unready_flows_lock
);