2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <linux/refcount.h>
42 #include <linux/completion.h>
43 #include <net/tc_act/tc_mirred.h>
44 #include <net/tc_act/tc_vlan.h>
45 #include <net/tc_act/tc_tunnel_key.h>
46 #include <net/tc_act/tc_pedit.h>
47 #include <net/tc_act/tc_csum.h>
49 #include <net/ipv6_stubs.h>
56 #include "en/tc_tun.h"
57 #include "lib/devcom.h"
58 #include "lib/geneve.h"
59 #include "diag/en_tc_tracepoint.h"
61 struct mlx5_nic_flow_attr
{
64 struct mlx5_modify_hdr
*modify_hdr
;
67 struct mlx5_flow_table
*hairpin_ft
;
68 struct mlx5_fc
*counter
;
71 #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
74 MLX5E_TC_FLOW_FLAG_INGRESS
= MLX5E_TC_FLAG_INGRESS_BIT
,
75 MLX5E_TC_FLOW_FLAG_EGRESS
= MLX5E_TC_FLAG_EGRESS_BIT
,
76 MLX5E_TC_FLOW_FLAG_ESWITCH
= MLX5E_TC_FLAG_ESW_OFFLOAD_BIT
,
77 MLX5E_TC_FLOW_FLAG_NIC
= MLX5E_TC_FLAG_NIC_OFFLOAD_BIT
,
78 MLX5E_TC_FLOW_FLAG_OFFLOADED
= MLX5E_TC_FLOW_BASE
,
79 MLX5E_TC_FLOW_FLAG_HAIRPIN
= MLX5E_TC_FLOW_BASE
+ 1,
80 MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS
= MLX5E_TC_FLOW_BASE
+ 2,
81 MLX5E_TC_FLOW_FLAG_SLOW
= MLX5E_TC_FLOW_BASE
+ 3,
82 MLX5E_TC_FLOW_FLAG_DUP
= MLX5E_TC_FLOW_BASE
+ 4,
83 MLX5E_TC_FLOW_FLAG_NOT_READY
= MLX5E_TC_FLOW_BASE
+ 5,
84 MLX5E_TC_FLOW_FLAG_DELETED
= MLX5E_TC_FLOW_BASE
+ 6,
87 #define MLX5E_TC_MAX_SPLITS 1
89 /* Helper struct for accessing a struct containing list_head array.
98 * To access the containing struct from one of the list_head items:
99 * 1. Get the helper item from the list_head item using
101 * container_of(list_head item, helper struct type, list_head field)
102 * 2. Get the contining struct from the helper item and its index in the array:
103 * containing struct =
104 * container_of(helper item, containing struct type, helper field[index])
106 struct encap_flow_item
{
107 struct mlx5e_encap_entry
*e
; /* attached encap instance */
108 struct list_head list
;
112 struct mlx5e_tc_flow
{
113 struct rhash_head node
;
114 struct mlx5e_priv
*priv
;
117 struct mlx5_flow_handle
*rule
[MLX5E_TC_MAX_SPLITS
+ 1];
118 /* Flow can be associated with multiple encap IDs.
119 * The number of encaps is bounded by the number of supported
122 struct encap_flow_item encaps
[MLX5_MAX_FLOW_FWD_VPORTS
];
123 struct mlx5e_tc_flow
*peer_flow
;
124 struct mlx5e_mod_hdr_entry
*mh
; /* attached mod header instance */
125 struct list_head mod_hdr
; /* flows sharing the same mod hdr ID */
126 struct mlx5e_hairpin_entry
*hpe
; /* attached hairpin instance */
127 struct list_head hairpin
; /* flows sharing the same hairpin */
128 struct list_head peer
; /* flows with peer flow */
129 struct list_head unready
; /* flows not ready to be offloaded (e.g due to missing route) */
131 struct list_head tmp_list
; /* temporary flow list used by neigh update */
133 struct rcu_head rcu_head
;
134 struct completion init_done
;
136 struct mlx5_esw_flow_attr esw_attr
[0];
137 struct mlx5_nic_flow_attr nic_attr
[0];
141 struct mlx5e_tc_flow_parse_attr
{
142 const struct ip_tunnel_info
*tun_info
[MLX5_MAX_FLOW_FWD_VPORTS
];
143 struct net_device
*filter_dev
;
144 struct mlx5_flow_spec spec
;
145 int num_mod_hdr_actions
;
146 int max_mod_hdr_actions
;
147 void *mod_hdr_actions
;
148 int mirred_ifindex
[MLX5_MAX_FLOW_FWD_VPORTS
];
151 #define MLX5E_TC_TABLE_NUM_GROUPS 4
152 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
154 struct mlx5e_hairpin
{
155 struct mlx5_hairpin
*pair
;
157 struct mlx5_core_dev
*func_mdev
;
158 struct mlx5e_priv
*func_priv
;
163 struct mlx5e_rqt indir_rqt
;
164 u32 indir_tirn
[MLX5E_NUM_INDIR_TIRS
];
165 struct mlx5e_ttc_table ttc
;
168 struct mlx5e_hairpin_entry
{
169 /* a node of a hash table which keeps all the hairpin entries */
170 struct hlist_node hairpin_hlist
;
172 /* protects flows list */
173 spinlock_t flows_lock
;
174 /* flows sharing the same hairpin */
175 struct list_head flows
;
176 /* hpe's that were not fully initialized when dead peer update event
177 * function traversed them.
179 struct list_head dead_peer_wait_list
;
183 struct mlx5e_hairpin
*hp
;
185 struct completion res_ready
;
193 struct mlx5e_mod_hdr_entry
{
194 /* a node of a hash table which keeps all the mod_hdr entries */
195 struct hlist_node mod_hdr_hlist
;
197 /* protects flows list */
198 spinlock_t flows_lock
;
199 /* flows sharing the same mod_hdr entry */
200 struct list_head flows
;
202 struct mod_hdr_key key
;
204 struct mlx5_modify_hdr
*modify_hdr
;
207 struct completion res_ready
;
211 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
213 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
214 struct mlx5e_tc_flow
*flow
);
216 static struct mlx5e_tc_flow
*mlx5e_flow_get(struct mlx5e_tc_flow
*flow
)
218 if (!flow
|| !refcount_inc_not_zero(&flow
->refcnt
))
219 return ERR_PTR(-EINVAL
);
223 static void mlx5e_flow_put(struct mlx5e_priv
*priv
,
224 struct mlx5e_tc_flow
*flow
)
226 if (refcount_dec_and_test(&flow
->refcnt
)) {
227 mlx5e_tc_del_flow(priv
, flow
);
228 kfree_rcu(flow
, rcu_head
);
232 static void __flow_flag_set(struct mlx5e_tc_flow
*flow
, unsigned long flag
)
234 /* Complete all memory stores before setting bit. */
235 smp_mb__before_atomic();
236 set_bit(flag
, &flow
->flags
);
239 #define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
241 static bool __flow_flag_test_and_set(struct mlx5e_tc_flow
*flow
,
244 /* test_and_set_bit() provides all necessary barriers */
245 return test_and_set_bit(flag
, &flow
->flags
);
248 #define flow_flag_test_and_set(flow, flag) \
249 __flow_flag_test_and_set(flow, \
250 MLX5E_TC_FLOW_FLAG_##flag)
252 static void __flow_flag_clear(struct mlx5e_tc_flow
*flow
, unsigned long flag
)
254 /* Complete all memory stores before clearing bit. */
255 smp_mb__before_atomic();
256 clear_bit(flag
, &flow
->flags
);
259 #define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
260 MLX5E_TC_FLOW_FLAG_##flag)
262 static bool __flow_flag_test(struct mlx5e_tc_flow
*flow
, unsigned long flag
)
264 bool ret
= test_bit(flag
, &flow
->flags
);
266 /* Read fields of flow structure only after checking flags. */
267 smp_mb__after_atomic();
271 #define flow_flag_test(flow, flag) __flow_flag_test(flow, \
272 MLX5E_TC_FLOW_FLAG_##flag)
274 static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow
*flow
)
276 return flow_flag_test(flow
, ESWITCH
);
279 static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow
*flow
)
281 return flow_flag_test(flow
, OFFLOADED
);
284 static inline u32
hash_mod_hdr_info(struct mod_hdr_key
*key
)
286 return jhash(key
->actions
,
287 key
->num_actions
* MLX5_MH_ACT_SZ
, 0);
290 static inline int cmp_mod_hdr_info(struct mod_hdr_key
*a
,
291 struct mod_hdr_key
*b
)
293 if (a
->num_actions
!= b
->num_actions
)
296 return memcmp(a
->actions
, b
->actions
, a
->num_actions
* MLX5_MH_ACT_SZ
);
299 static struct mod_hdr_tbl
*
300 get_mod_hdr_table(struct mlx5e_priv
*priv
, int namespace)
302 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
304 return namespace == MLX5_FLOW_NAMESPACE_FDB
? &esw
->offloads
.mod_hdr
:
305 &priv
->fs
.tc
.mod_hdr
;
308 static struct mlx5e_mod_hdr_entry
*
309 mlx5e_mod_hdr_get(struct mod_hdr_tbl
*tbl
, struct mod_hdr_key
*key
, u32 hash_key
)
311 struct mlx5e_mod_hdr_entry
*mh
, *found
= NULL
;
313 hash_for_each_possible(tbl
->hlist
, mh
, mod_hdr_hlist
, hash_key
) {
314 if (!cmp_mod_hdr_info(&mh
->key
, key
)) {
315 refcount_inc(&mh
->refcnt
);
324 static void mlx5e_mod_hdr_put(struct mlx5e_priv
*priv
,
325 struct mlx5e_mod_hdr_entry
*mh
,
328 struct mod_hdr_tbl
*tbl
= get_mod_hdr_table(priv
, namespace);
330 if (!refcount_dec_and_mutex_lock(&mh
->refcnt
, &tbl
->lock
))
332 hash_del(&mh
->mod_hdr_hlist
);
333 mutex_unlock(&tbl
->lock
);
335 WARN_ON(!list_empty(&mh
->flows
));
336 if (mh
->compl_result
> 0)
337 mlx5_modify_header_dealloc(priv
->mdev
, mh
->modify_hdr
);
342 static int get_flow_name_space(struct mlx5e_tc_flow
*flow
)
344 return mlx5e_is_eswitch_flow(flow
) ?
345 MLX5_FLOW_NAMESPACE_FDB
: MLX5_FLOW_NAMESPACE_KERNEL
;
347 static int mlx5e_attach_mod_hdr(struct mlx5e_priv
*priv
,
348 struct mlx5e_tc_flow
*flow
,
349 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
351 int num_actions
, actions_size
, namespace, err
;
352 struct mlx5e_mod_hdr_entry
*mh
;
353 struct mod_hdr_tbl
*tbl
;
354 struct mod_hdr_key key
;
357 num_actions
= parse_attr
->num_mod_hdr_actions
;
358 actions_size
= MLX5_MH_ACT_SZ
* num_actions
;
360 key
.actions
= parse_attr
->mod_hdr_actions
;
361 key
.num_actions
= num_actions
;
363 hash_key
= hash_mod_hdr_info(&key
);
365 namespace = get_flow_name_space(flow
);
366 tbl
= get_mod_hdr_table(priv
, namespace);
368 mutex_lock(&tbl
->lock
);
369 mh
= mlx5e_mod_hdr_get(tbl
, &key
, hash_key
);
371 mutex_unlock(&tbl
->lock
);
372 wait_for_completion(&mh
->res_ready
);
374 if (mh
->compl_result
< 0) {
376 goto attach_header_err
;
381 mh
= kzalloc(sizeof(*mh
) + actions_size
, GFP_KERNEL
);
383 mutex_unlock(&tbl
->lock
);
387 mh
->key
.actions
= (void *)mh
+ sizeof(*mh
);
388 memcpy(mh
->key
.actions
, key
.actions
, actions_size
);
389 mh
->key
.num_actions
= num_actions
;
390 spin_lock_init(&mh
->flows_lock
);
391 INIT_LIST_HEAD(&mh
->flows
);
392 refcount_set(&mh
->refcnt
, 1);
393 init_completion(&mh
->res_ready
);
395 hash_add(tbl
->hlist
, &mh
->mod_hdr_hlist
, hash_key
);
396 mutex_unlock(&tbl
->lock
);
398 mh
->modify_hdr
= mlx5_modify_header_alloc(priv
->mdev
, namespace,
401 if (IS_ERR(mh
->modify_hdr
)) {
402 err
= PTR_ERR(mh
->modify_hdr
);
403 mh
->compl_result
= err
;
404 goto alloc_header_err
;
406 mh
->compl_result
= 1;
407 complete_all(&mh
->res_ready
);
411 spin_lock(&mh
->flows_lock
);
412 list_add(&flow
->mod_hdr
, &mh
->flows
);
413 spin_unlock(&mh
->flows_lock
);
414 if (mlx5e_is_eswitch_flow(flow
))
415 flow
->esw_attr
->modify_hdr
= mh
->modify_hdr
;
417 flow
->nic_attr
->modify_hdr
= mh
->modify_hdr
;
422 complete_all(&mh
->res_ready
);
424 mlx5e_mod_hdr_put(priv
, mh
, namespace);
428 static void mlx5e_detach_mod_hdr(struct mlx5e_priv
*priv
,
429 struct mlx5e_tc_flow
*flow
)
431 /* flow wasn't fully initialized */
435 spin_lock(&flow
->mh
->flows_lock
);
436 list_del(&flow
->mod_hdr
);
437 spin_unlock(&flow
->mh
->flows_lock
);
439 mlx5e_mod_hdr_put(priv
, flow
->mh
, get_flow_name_space(flow
));
444 struct mlx5_core_dev
*mlx5e_hairpin_get_mdev(struct net
*net
, int ifindex
)
446 struct net_device
*netdev
;
447 struct mlx5e_priv
*priv
;
449 netdev
= __dev_get_by_index(net
, ifindex
);
450 priv
= netdev_priv(netdev
);
454 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin
*hp
)
456 u32 in
[MLX5_ST_SZ_DW(create_tir_in
)] = {0};
460 err
= mlx5_core_alloc_transport_domain(hp
->func_mdev
, &hp
->tdn
);
464 tirc
= MLX5_ADDR_OF(create_tir_in
, in
, ctx
);
466 MLX5_SET(tirc
, tirc
, disp_type
, MLX5_TIRC_DISP_TYPE_DIRECT
);
467 MLX5_SET(tirc
, tirc
, inline_rqn
, hp
->pair
->rqn
[0]);
468 MLX5_SET(tirc
, tirc
, transport_domain
, hp
->tdn
);
470 err
= mlx5_core_create_tir(hp
->func_mdev
, in
, MLX5_ST_SZ_BYTES(create_tir_in
), &hp
->tirn
);
477 mlx5_core_dealloc_transport_domain(hp
->func_mdev
, hp
->tdn
);
482 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin
*hp
)
484 mlx5_core_destroy_tir(hp
->func_mdev
, hp
->tirn
);
485 mlx5_core_dealloc_transport_domain(hp
->func_mdev
, hp
->tdn
);
488 static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin
*hp
, void *rqtc
)
490 u32 indirection_rqt
[MLX5E_INDIR_RQT_SIZE
], rqn
;
491 struct mlx5e_priv
*priv
= hp
->func_priv
;
492 int i
, ix
, sz
= MLX5E_INDIR_RQT_SIZE
;
494 mlx5e_build_default_indir_rqt(indirection_rqt
, sz
,
497 for (i
= 0; i
< sz
; i
++) {
499 if (priv
->rss_params
.hfunc
== ETH_RSS_HASH_XOR
)
500 ix
= mlx5e_bits_invert(i
, ilog2(sz
));
501 ix
= indirection_rqt
[ix
];
502 rqn
= hp
->pair
->rqn
[ix
];
503 MLX5_SET(rqtc
, rqtc
, rq_num
[i
], rqn
);
507 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin
*hp
)
509 int inlen
, err
, sz
= MLX5E_INDIR_RQT_SIZE
;
510 struct mlx5e_priv
*priv
= hp
->func_priv
;
511 struct mlx5_core_dev
*mdev
= priv
->mdev
;
515 inlen
= MLX5_ST_SZ_BYTES(create_rqt_in
) + sizeof(u32
) * sz
;
516 in
= kvzalloc(inlen
, GFP_KERNEL
);
520 rqtc
= MLX5_ADDR_OF(create_rqt_in
, in
, rqt_context
);
522 MLX5_SET(rqtc
, rqtc
, rqt_actual_size
, sz
);
523 MLX5_SET(rqtc
, rqtc
, rqt_max_size
, sz
);
525 mlx5e_hairpin_fill_rqt_rqns(hp
, rqtc
);
527 err
= mlx5_core_create_rqt(mdev
, in
, inlen
, &hp
->indir_rqt
.rqtn
);
529 hp
->indir_rqt
.enabled
= true;
535 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin
*hp
)
537 struct mlx5e_priv
*priv
= hp
->func_priv
;
538 u32 in
[MLX5_ST_SZ_DW(create_tir_in
)];
542 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++) {
543 struct mlx5e_tirc_config ttconfig
= mlx5e_tirc_get_default_config(tt
);
545 memset(in
, 0, MLX5_ST_SZ_BYTES(create_tir_in
));
546 tirc
= MLX5_ADDR_OF(create_tir_in
, in
, ctx
);
548 MLX5_SET(tirc
, tirc
, transport_domain
, hp
->tdn
);
549 MLX5_SET(tirc
, tirc
, disp_type
, MLX5_TIRC_DISP_TYPE_INDIRECT
);
550 MLX5_SET(tirc
, tirc
, indirect_table
, hp
->indir_rqt
.rqtn
);
551 mlx5e_build_indir_tir_ctx_hash(&priv
->rss_params
, &ttconfig
, tirc
, false);
553 err
= mlx5_core_create_tir(hp
->func_mdev
, in
,
554 MLX5_ST_SZ_BYTES(create_tir_in
), &hp
->indir_tirn
[tt
]);
556 mlx5_core_warn(hp
->func_mdev
, "create indirect tirs failed, %d\n", err
);
557 goto err_destroy_tirs
;
563 for (i
= 0; i
< tt
; i
++)
564 mlx5_core_destroy_tir(hp
->func_mdev
, hp
->indir_tirn
[i
]);
568 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin
*hp
)
572 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++)
573 mlx5_core_destroy_tir(hp
->func_mdev
, hp
->indir_tirn
[tt
]);
576 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin
*hp
,
577 struct ttc_params
*ttc_params
)
579 struct mlx5_flow_table_attr
*ft_attr
= &ttc_params
->ft_attr
;
582 memset(ttc_params
, 0, sizeof(*ttc_params
));
584 ttc_params
->any_tt_tirn
= hp
->tirn
;
586 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++)
587 ttc_params
->indir_tirn
[tt
] = hp
->indir_tirn
[tt
];
589 ft_attr
->max_fte
= MLX5E_NUM_TT
;
590 ft_attr
->level
= MLX5E_TC_TTC_FT_LEVEL
;
591 ft_attr
->prio
= MLX5E_TC_PRIO
;
594 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin
*hp
)
596 struct mlx5e_priv
*priv
= hp
->func_priv
;
597 struct ttc_params ttc_params
;
600 err
= mlx5e_hairpin_create_indirect_rqt(hp
);
604 err
= mlx5e_hairpin_create_indirect_tirs(hp
);
606 goto err_create_indirect_tirs
;
608 mlx5e_hairpin_set_ttc_params(hp
, &ttc_params
);
609 err
= mlx5e_create_ttc_table(priv
, &ttc_params
, &hp
->ttc
);
611 goto err_create_ttc_table
;
613 netdev_dbg(priv
->netdev
, "add hairpin: using %d channels rss ttc table id %x\n",
614 hp
->num_channels
, hp
->ttc
.ft
.t
->id
);
618 err_create_ttc_table
:
619 mlx5e_hairpin_destroy_indirect_tirs(hp
);
620 err_create_indirect_tirs
:
621 mlx5e_destroy_rqt(priv
, &hp
->indir_rqt
);
626 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin
*hp
)
628 struct mlx5e_priv
*priv
= hp
->func_priv
;
630 mlx5e_destroy_ttc_table(priv
, &hp
->ttc
);
631 mlx5e_hairpin_destroy_indirect_tirs(hp
);
632 mlx5e_destroy_rqt(priv
, &hp
->indir_rqt
);
635 static struct mlx5e_hairpin
*
636 mlx5e_hairpin_create(struct mlx5e_priv
*priv
, struct mlx5_hairpin_params
*params
,
639 struct mlx5_core_dev
*func_mdev
, *peer_mdev
;
640 struct mlx5e_hairpin
*hp
;
641 struct mlx5_hairpin
*pair
;
644 hp
= kzalloc(sizeof(*hp
), GFP_KERNEL
);
646 return ERR_PTR(-ENOMEM
);
648 func_mdev
= priv
->mdev
;
649 peer_mdev
= mlx5e_hairpin_get_mdev(dev_net(priv
->netdev
), peer_ifindex
);
651 pair
= mlx5_core_hairpin_create(func_mdev
, peer_mdev
, params
);
654 goto create_pair_err
;
657 hp
->func_mdev
= func_mdev
;
658 hp
->func_priv
= priv
;
659 hp
->num_channels
= params
->num_channels
;
661 err
= mlx5e_hairpin_create_transport(hp
);
663 goto create_transport_err
;
665 if (hp
->num_channels
> 1) {
666 err
= mlx5e_hairpin_rss_init(hp
);
674 mlx5e_hairpin_destroy_transport(hp
);
675 create_transport_err
:
676 mlx5_core_hairpin_destroy(hp
->pair
);
682 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin
*hp
)
684 if (hp
->num_channels
> 1)
685 mlx5e_hairpin_rss_cleanup(hp
);
686 mlx5e_hairpin_destroy_transport(hp
);
687 mlx5_core_hairpin_destroy(hp
->pair
);
691 static inline u32
hash_hairpin_info(u16 peer_vhca_id
, u8 prio
)
693 return (peer_vhca_id
<< 16 | prio
);
696 static struct mlx5e_hairpin_entry
*mlx5e_hairpin_get(struct mlx5e_priv
*priv
,
697 u16 peer_vhca_id
, u8 prio
)
699 struct mlx5e_hairpin_entry
*hpe
;
700 u32 hash_key
= hash_hairpin_info(peer_vhca_id
, prio
);
702 hash_for_each_possible(priv
->fs
.tc
.hairpin_tbl
, hpe
,
703 hairpin_hlist
, hash_key
) {
704 if (hpe
->peer_vhca_id
== peer_vhca_id
&& hpe
->prio
== prio
) {
705 refcount_inc(&hpe
->refcnt
);
713 static void mlx5e_hairpin_put(struct mlx5e_priv
*priv
,
714 struct mlx5e_hairpin_entry
*hpe
)
716 /* no more hairpin flows for us, release the hairpin pair */
717 if (!refcount_dec_and_mutex_lock(&hpe
->refcnt
, &priv
->fs
.tc
.hairpin_tbl_lock
))
719 hash_del(&hpe
->hairpin_hlist
);
720 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
722 if (!IS_ERR_OR_NULL(hpe
->hp
)) {
723 netdev_dbg(priv
->netdev
, "del hairpin: peer %s\n",
724 dev_name(hpe
->hp
->pair
->peer_mdev
->device
));
726 mlx5e_hairpin_destroy(hpe
->hp
);
729 WARN_ON(!list_empty(&hpe
->flows
));
733 #define UNKNOWN_MATCH_PRIO 8
735 static int mlx5e_hairpin_get_prio(struct mlx5e_priv
*priv
,
736 struct mlx5_flow_spec
*spec
, u8
*match_prio
,
737 struct netlink_ext_ack
*extack
)
739 void *headers_c
, *headers_v
;
740 u8 prio_val
, prio_mask
= 0;
743 #ifdef CONFIG_MLX5_CORE_EN_DCB
744 if (priv
->dcbx_dp
.trust_state
!= MLX5_QPTS_TRUST_PCP
) {
745 NL_SET_ERR_MSG_MOD(extack
,
746 "only PCP trust state supported for hairpin");
750 headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
, outer_headers
);
751 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
, outer_headers
);
753 vlan_present
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
);
755 prio_mask
= MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, first_prio
);
756 prio_val
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, first_prio
);
759 if (!vlan_present
|| !prio_mask
) {
760 prio_val
= UNKNOWN_MATCH_PRIO
;
761 } else if (prio_mask
!= 0x7) {
762 NL_SET_ERR_MSG_MOD(extack
,
763 "masked priority match not supported for hairpin");
767 *match_prio
= prio_val
;
771 static int mlx5e_hairpin_flow_add(struct mlx5e_priv
*priv
,
772 struct mlx5e_tc_flow
*flow
,
773 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
774 struct netlink_ext_ack
*extack
)
776 int peer_ifindex
= parse_attr
->mirred_ifindex
[0];
777 struct mlx5_hairpin_params params
;
778 struct mlx5_core_dev
*peer_mdev
;
779 struct mlx5e_hairpin_entry
*hpe
;
780 struct mlx5e_hairpin
*hp
;
787 peer_mdev
= mlx5e_hairpin_get_mdev(dev_net(priv
->netdev
), peer_ifindex
);
788 if (!MLX5_CAP_GEN(priv
->mdev
, hairpin
) || !MLX5_CAP_GEN(peer_mdev
, hairpin
)) {
789 NL_SET_ERR_MSG_MOD(extack
, "hairpin is not supported");
793 peer_id
= MLX5_CAP_GEN(peer_mdev
, vhca_id
);
794 err
= mlx5e_hairpin_get_prio(priv
, &parse_attr
->spec
, &match_prio
,
799 mutex_lock(&priv
->fs
.tc
.hairpin_tbl_lock
);
800 hpe
= mlx5e_hairpin_get(priv
, peer_id
, match_prio
);
802 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
803 wait_for_completion(&hpe
->res_ready
);
805 if (IS_ERR(hpe
->hp
)) {
812 hpe
= kzalloc(sizeof(*hpe
), GFP_KERNEL
);
814 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
818 spin_lock_init(&hpe
->flows_lock
);
819 INIT_LIST_HEAD(&hpe
->flows
);
820 INIT_LIST_HEAD(&hpe
->dead_peer_wait_list
);
821 hpe
->peer_vhca_id
= peer_id
;
822 hpe
->prio
= match_prio
;
823 refcount_set(&hpe
->refcnt
, 1);
824 init_completion(&hpe
->res_ready
);
826 hash_add(priv
->fs
.tc
.hairpin_tbl
, &hpe
->hairpin_hlist
,
827 hash_hairpin_info(peer_id
, match_prio
));
828 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
830 params
.log_data_size
= 15;
831 params
.log_data_size
= min_t(u8
, params
.log_data_size
,
832 MLX5_CAP_GEN(priv
->mdev
, log_max_hairpin_wq_data_sz
));
833 params
.log_data_size
= max_t(u8
, params
.log_data_size
,
834 MLX5_CAP_GEN(priv
->mdev
, log_min_hairpin_wq_data_sz
));
836 params
.log_num_packets
= params
.log_data_size
-
837 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv
->mdev
);
838 params
.log_num_packets
= min_t(u8
, params
.log_num_packets
,
839 MLX5_CAP_GEN(priv
->mdev
, log_max_hairpin_num_packets
));
841 params
.q_counter
= priv
->q_counter
;
842 /* set hairpin pair per each 50Gbs share of the link */
843 mlx5e_port_max_linkspeed(priv
->mdev
, &link_speed
);
844 link_speed
= max_t(u32
, link_speed
, 50000);
845 link_speed64
= link_speed
;
846 do_div(link_speed64
, 50000);
847 params
.num_channels
= link_speed64
;
849 hp
= mlx5e_hairpin_create(priv
, ¶ms
, peer_ifindex
);
851 complete_all(&hpe
->res_ready
);
857 netdev_dbg(priv
->netdev
, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
858 hp
->tirn
, hp
->pair
->rqn
[0],
859 dev_name(hp
->pair
->peer_mdev
->device
),
860 hp
->pair
->sqn
[0], match_prio
, params
.log_data_size
, params
.log_num_packets
);
863 if (hpe
->hp
->num_channels
> 1) {
864 flow_flag_set(flow
, HAIRPIN_RSS
);
865 flow
->nic_attr
->hairpin_ft
= hpe
->hp
->ttc
.ft
.t
;
867 flow
->nic_attr
->hairpin_tirn
= hpe
->hp
->tirn
;
871 spin_lock(&hpe
->flows_lock
);
872 list_add(&flow
->hairpin
, &hpe
->flows
);
873 spin_unlock(&hpe
->flows_lock
);
878 mlx5e_hairpin_put(priv
, hpe
);
882 static void mlx5e_hairpin_flow_del(struct mlx5e_priv
*priv
,
883 struct mlx5e_tc_flow
*flow
)
885 /* flow wasn't fully initialized */
889 spin_lock(&flow
->hpe
->flows_lock
);
890 list_del(&flow
->hairpin
);
891 spin_unlock(&flow
->hpe
->flows_lock
);
893 mlx5e_hairpin_put(priv
, flow
->hpe
);
898 mlx5e_tc_add_nic_flow(struct mlx5e_priv
*priv
,
899 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
900 struct mlx5e_tc_flow
*flow
,
901 struct netlink_ext_ack
*extack
)
903 struct mlx5_flow_context
*flow_context
= &parse_attr
->spec
.flow_context
;
904 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
905 struct mlx5_core_dev
*dev
= priv
->mdev
;
906 struct mlx5_flow_destination dest
[2] = {};
907 struct mlx5_flow_act flow_act
= {
908 .action
= attr
->action
,
909 .flags
= FLOW_ACT_NO_APPEND
,
911 struct mlx5_fc
*counter
= NULL
;
912 int err
, dest_ix
= 0;
914 flow_context
->flags
|= FLOW_CONTEXT_HAS_TAG
;
915 flow_context
->flow_tag
= attr
->flow_tag
;
917 if (flow_flag_test(flow
, HAIRPIN
)) {
918 err
= mlx5e_hairpin_flow_add(priv
, flow
, parse_attr
, extack
);
922 if (flow_flag_test(flow
, HAIRPIN_RSS
)) {
923 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
924 dest
[dest_ix
].ft
= attr
->hairpin_ft
;
926 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_TIR
;
927 dest
[dest_ix
].tir_num
= attr
->hairpin_tirn
;
930 } else if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
931 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
932 dest
[dest_ix
].ft
= priv
->fs
.vlan
.ft
.t
;
936 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
937 counter
= mlx5_fc_create(dev
, true);
939 return PTR_ERR(counter
);
941 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_COUNTER
;
942 dest
[dest_ix
].counter_id
= mlx5_fc_id(counter
);
944 attr
->counter
= counter
;
947 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
948 err
= mlx5e_attach_mod_hdr(priv
, flow
, parse_attr
);
949 flow_act
.modify_hdr
= attr
->modify_hdr
;
950 kfree(parse_attr
->mod_hdr_actions
);
955 mutex_lock(&priv
->fs
.tc
.t_lock
);
956 if (IS_ERR_OR_NULL(priv
->fs
.tc
.t
)) {
957 int tc_grp_size
, tc_tbl_size
;
958 u32 max_flow_counter
;
960 max_flow_counter
= (MLX5_CAP_GEN(dev
, max_flow_counter_31_16
) << 16) |
961 MLX5_CAP_GEN(dev
, max_flow_counter_15_0
);
963 tc_grp_size
= min_t(int, max_flow_counter
, MLX5E_TC_TABLE_MAX_GROUP_SIZE
);
965 tc_tbl_size
= min_t(int, tc_grp_size
* MLX5E_TC_TABLE_NUM_GROUPS
,
966 BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev
, log_max_ft_size
)));
969 mlx5_create_auto_grouped_flow_table(priv
->fs
.ns
,
972 MLX5E_TC_TABLE_NUM_GROUPS
,
973 MLX5E_TC_FT_LEVEL
, 0);
974 if (IS_ERR(priv
->fs
.tc
.t
)) {
975 mutex_unlock(&priv
->fs
.tc
.t_lock
);
976 NL_SET_ERR_MSG_MOD(extack
,
977 "Failed to create tc offload table\n");
978 netdev_err(priv
->netdev
,
979 "Failed to create tc offload table\n");
980 return PTR_ERR(priv
->fs
.tc
.t
);
984 if (attr
->match_level
!= MLX5_MATCH_NONE
)
985 parse_attr
->spec
.match_criteria_enable
|= MLX5_MATCH_OUTER_HEADERS
;
987 flow
->rule
[0] = mlx5_add_flow_rules(priv
->fs
.tc
.t
, &parse_attr
->spec
,
988 &flow_act
, dest
, dest_ix
);
989 mutex_unlock(&priv
->fs
.tc
.t_lock
);
991 return PTR_ERR_OR_ZERO(flow
->rule
[0]);
994 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv
*priv
,
995 struct mlx5e_tc_flow
*flow
)
997 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
998 struct mlx5_fc
*counter
= NULL
;
1000 counter
= attr
->counter
;
1001 if (!IS_ERR_OR_NULL(flow
->rule
[0]))
1002 mlx5_del_flow_rules(flow
->rule
[0]);
1003 mlx5_fc_destroy(priv
->mdev
, counter
);
1005 mutex_lock(&priv
->fs
.tc
.t_lock
);
1006 if (!mlx5e_tc_num_filters(priv
, MLX5_TC_FLAG(NIC_OFFLOAD
)) && priv
->fs
.tc
.t
) {
1007 mlx5_destroy_flow_table(priv
->fs
.tc
.t
);
1008 priv
->fs
.tc
.t
= NULL
;
1010 mutex_unlock(&priv
->fs
.tc
.t_lock
);
1012 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
1013 mlx5e_detach_mod_hdr(priv
, flow
);
1015 if (flow_flag_test(flow
, HAIRPIN
))
1016 mlx5e_hairpin_flow_del(priv
, flow
);
1019 static void mlx5e_detach_encap(struct mlx5e_priv
*priv
,
1020 struct mlx5e_tc_flow
*flow
, int out_index
);
1022 static int mlx5e_attach_encap(struct mlx5e_priv
*priv
,
1023 struct mlx5e_tc_flow
*flow
,
1024 struct net_device
*mirred_dev
,
1026 struct netlink_ext_ack
*extack
,
1027 struct net_device
**encap_dev
,
1030 static struct mlx5_flow_handle
*
1031 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch
*esw
,
1032 struct mlx5e_tc_flow
*flow
,
1033 struct mlx5_flow_spec
*spec
,
1034 struct mlx5_esw_flow_attr
*attr
)
1036 struct mlx5_flow_handle
*rule
;
1038 rule
= mlx5_eswitch_add_offloaded_rule(esw
, spec
, attr
);
1042 if (attr
->split_count
) {
1043 flow
->rule
[1] = mlx5_eswitch_add_fwd_rule(esw
, spec
, attr
);
1044 if (IS_ERR(flow
->rule
[1])) {
1045 mlx5_eswitch_del_offloaded_rule(esw
, rule
, attr
);
1046 return flow
->rule
[1];
1054 mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch
*esw
,
1055 struct mlx5e_tc_flow
*flow
,
1056 struct mlx5_esw_flow_attr
*attr
)
1058 flow_flag_clear(flow
, OFFLOADED
);
1060 if (attr
->split_count
)
1061 mlx5_eswitch_del_fwd_rule(esw
, flow
->rule
[1], attr
);
1063 mlx5_eswitch_del_offloaded_rule(esw
, flow
->rule
[0], attr
);
1066 static struct mlx5_flow_handle
*
1067 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch
*esw
,
1068 struct mlx5e_tc_flow
*flow
,
1069 struct mlx5_flow_spec
*spec
,
1070 struct mlx5_esw_flow_attr
*slow_attr
)
1072 struct mlx5_flow_handle
*rule
;
1074 memcpy(slow_attr
, flow
->esw_attr
, sizeof(*slow_attr
));
1075 slow_attr
->action
= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1076 slow_attr
->split_count
= 0;
1077 slow_attr
->dest_chain
= FDB_SLOW_PATH_CHAIN
;
1079 rule
= mlx5e_tc_offload_fdb_rules(esw
, flow
, spec
, slow_attr
);
1081 flow_flag_set(flow
, SLOW
);
1087 mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch
*esw
,
1088 struct mlx5e_tc_flow
*flow
,
1089 struct mlx5_esw_flow_attr
*slow_attr
)
1091 memcpy(slow_attr
, flow
->esw_attr
, sizeof(*slow_attr
));
1092 slow_attr
->action
= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1093 slow_attr
->split_count
= 0;
1094 slow_attr
->dest_chain
= FDB_SLOW_PATH_CHAIN
;
1095 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, slow_attr
);
1096 flow_flag_clear(flow
, SLOW
);
1099 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1102 static void unready_flow_add(struct mlx5e_tc_flow
*flow
,
1103 struct list_head
*unready_flows
)
1105 flow_flag_set(flow
, NOT_READY
);
1106 list_add_tail(&flow
->unready
, unready_flows
);
1109 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1112 static void unready_flow_del(struct mlx5e_tc_flow
*flow
)
1114 list_del(&flow
->unready
);
1115 flow_flag_clear(flow
, NOT_READY
);
1118 static void add_unready_flow(struct mlx5e_tc_flow
*flow
)
1120 struct mlx5_rep_uplink_priv
*uplink_priv
;
1121 struct mlx5e_rep_priv
*rpriv
;
1122 struct mlx5_eswitch
*esw
;
1124 esw
= flow
->priv
->mdev
->priv
.eswitch
;
1125 rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
1126 uplink_priv
= &rpriv
->uplink_priv
;
1128 mutex_lock(&uplink_priv
->unready_flows_lock
);
1129 unready_flow_add(flow
, &uplink_priv
->unready_flows
);
1130 mutex_unlock(&uplink_priv
->unready_flows_lock
);
1133 static void remove_unready_flow(struct mlx5e_tc_flow
*flow
)
1135 struct mlx5_rep_uplink_priv
*uplink_priv
;
1136 struct mlx5e_rep_priv
*rpriv
;
1137 struct mlx5_eswitch
*esw
;
1139 esw
= flow
->priv
->mdev
->priv
.eswitch
;
1140 rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
1141 uplink_priv
= &rpriv
->uplink_priv
;
1143 mutex_lock(&uplink_priv
->unready_flows_lock
);
1144 unready_flow_del(flow
);
1145 mutex_unlock(&uplink_priv
->unready_flows_lock
);
1149 mlx5e_tc_add_fdb_flow(struct mlx5e_priv
*priv
,
1150 struct mlx5e_tc_flow
*flow
,
1151 struct netlink_ext_ack
*extack
)
1153 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1154 u32 max_chain
= mlx5_eswitch_get_chain_range(esw
);
1155 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
1156 struct mlx5e_tc_flow_parse_attr
*parse_attr
= attr
->parse_attr
;
1157 u16 max_prio
= mlx5_eswitch_get_prio_range(esw
);
1158 struct net_device
*out_dev
, *encap_dev
= NULL
;
1159 struct mlx5_fc
*counter
= NULL
;
1160 struct mlx5e_rep_priv
*rpriv
;
1161 struct mlx5e_priv
*out_priv
;
1162 bool encap_valid
= true;
1166 if (!mlx5_eswitch_prios_supported(esw
) && attr
->prio
!= 1) {
1167 NL_SET_ERR_MSG(extack
, "E-switch priorities unsupported, upgrade FW");
1171 if (attr
->chain
> max_chain
) {
1172 NL_SET_ERR_MSG(extack
, "Requested chain is out of supported range");
1176 if (attr
->prio
> max_prio
) {
1177 NL_SET_ERR_MSG(extack
, "Requested priority is out of supported range");
1181 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++) {
1184 if (!(attr
->dests
[out_index
].flags
& MLX5_ESW_DEST_ENCAP
))
1187 mirred_ifindex
= parse_attr
->mirred_ifindex
[out_index
];
1188 out_dev
= __dev_get_by_index(dev_net(priv
->netdev
),
1190 err
= mlx5e_attach_encap(priv
, flow
, out_dev
, out_index
,
1191 extack
, &encap_dev
, &encap_valid
);
1195 out_priv
= netdev_priv(encap_dev
);
1196 rpriv
= out_priv
->ppriv
;
1197 attr
->dests
[out_index
].rep
= rpriv
->rep
;
1198 attr
->dests
[out_index
].mdev
= out_priv
->mdev
;
1201 err
= mlx5_eswitch_add_vlan_action(esw
, attr
);
1205 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
1206 err
= mlx5e_attach_mod_hdr(priv
, flow
, parse_attr
);
1207 kfree(parse_attr
->mod_hdr_actions
);
1212 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
1213 counter
= mlx5_fc_create(attr
->counter_dev
, true);
1214 if (IS_ERR(counter
))
1215 return PTR_ERR(counter
);
1217 attr
->counter
= counter
;
1220 /* we get here if one of the following takes place:
1221 * (1) there's no error
1222 * (2) there's an encap action and we don't have valid neigh
1225 /* continue with goto slow path rule instead */
1226 struct mlx5_esw_flow_attr slow_attr
;
1228 flow
->rule
[0] = mlx5e_tc_offload_to_slow_path(esw
, flow
, &parse_attr
->spec
, &slow_attr
);
1230 flow
->rule
[0] = mlx5e_tc_offload_fdb_rules(esw
, flow
, &parse_attr
->spec
, attr
);
1233 if (IS_ERR(flow
->rule
[0]))
1234 return PTR_ERR(flow
->rule
[0]);
1236 flow_flag_set(flow
, OFFLOADED
);
1241 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow
*flow
)
1243 struct mlx5_flow_spec
*spec
= &flow
->esw_attr
->parse_attr
->spec
;
1244 void *headers_v
= MLX5_ADDR_OF(fte_match_param
,
1247 u32 geneve_tlv_opt_0_data
= MLX5_GET(fte_match_set_misc3
,
1249 geneve_tlv_option_0_data
);
1251 return !!geneve_tlv_opt_0_data
;
1254 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv
*priv
,
1255 struct mlx5e_tc_flow
*flow
)
1257 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1258 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
1259 struct mlx5_esw_flow_attr slow_attr
;
1262 if (flow_flag_test(flow
, NOT_READY
)) {
1263 remove_unready_flow(flow
);
1264 kvfree(attr
->parse_attr
);
1268 if (mlx5e_is_offloaded_flow(flow
)) {
1269 if (flow_flag_test(flow
, SLOW
))
1270 mlx5e_tc_unoffload_from_slow_path(esw
, flow
, &slow_attr
);
1272 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, attr
);
1275 if (mlx5_flow_has_geneve_opt(flow
))
1276 mlx5_geneve_tlv_option_del(priv
->mdev
->geneve
);
1278 mlx5_eswitch_del_vlan_action(esw
, attr
);
1280 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++)
1281 if (attr
->dests
[out_index
].flags
& MLX5_ESW_DEST_ENCAP
)
1282 mlx5e_detach_encap(priv
, flow
, out_index
);
1283 kvfree(attr
->parse_attr
);
1285 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
1286 mlx5e_detach_mod_hdr(priv
, flow
);
1288 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
)
1289 mlx5_fc_destroy(attr
->counter_dev
, attr
->counter
);
1292 void mlx5e_tc_encap_flows_add(struct mlx5e_priv
*priv
,
1293 struct mlx5e_encap_entry
*e
,
1294 struct list_head
*flow_list
)
1296 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1297 struct mlx5_esw_flow_attr slow_attr
, *esw_attr
;
1298 struct mlx5_flow_handle
*rule
;
1299 struct mlx5_flow_spec
*spec
;
1300 struct mlx5e_tc_flow
*flow
;
1303 e
->pkt_reformat
= mlx5_packet_reformat_alloc(priv
->mdev
,
1305 e
->encap_size
, e
->encap_header
,
1306 MLX5_FLOW_NAMESPACE_FDB
);
1307 if (IS_ERR(e
->pkt_reformat
)) {
1308 mlx5_core_warn(priv
->mdev
, "Failed to offload cached encapsulation header, %lu\n",
1309 PTR_ERR(e
->pkt_reformat
));
1312 e
->flags
|= MLX5_ENCAP_ENTRY_VALID
;
1313 mlx5e_rep_queue_neigh_stats_work(priv
);
1315 list_for_each_entry(flow
, flow_list
, tmp_list
) {
1316 bool all_flow_encaps_valid
= true;
1319 if (!mlx5e_is_offloaded_flow(flow
))
1321 esw_attr
= flow
->esw_attr
;
1322 spec
= &esw_attr
->parse_attr
->spec
;
1324 esw_attr
->dests
[flow
->tmp_efi_index
].pkt_reformat
= e
->pkt_reformat
;
1325 esw_attr
->dests
[flow
->tmp_efi_index
].flags
|= MLX5_ESW_DEST_ENCAP_VALID
;
1326 /* Flow can be associated with multiple encap entries.
1327 * Before offloading the flow verify that all of them have
1328 * a valid neighbour.
1330 for (i
= 0; i
< MLX5_MAX_FLOW_FWD_VPORTS
; i
++) {
1331 if (!(esw_attr
->dests
[i
].flags
& MLX5_ESW_DEST_ENCAP
))
1333 if (!(esw_attr
->dests
[i
].flags
& MLX5_ESW_DEST_ENCAP_VALID
)) {
1334 all_flow_encaps_valid
= false;
1338 /* Do not offload flows with unresolved neighbors */
1339 if (!all_flow_encaps_valid
)
1341 /* update from slow path rule to encap rule */
1342 rule
= mlx5e_tc_offload_fdb_rules(esw
, flow
, spec
, esw_attr
);
1344 err
= PTR_ERR(rule
);
1345 mlx5_core_warn(priv
->mdev
, "Failed to update cached encapsulation flow, %d\n",
1350 mlx5e_tc_unoffload_from_slow_path(esw
, flow
, &slow_attr
);
1351 flow
->rule
[0] = rule
;
1352 /* was unset when slow path rule removed */
1353 flow_flag_set(flow
, OFFLOADED
);
1357 void mlx5e_tc_encap_flows_del(struct mlx5e_priv
*priv
,
1358 struct mlx5e_encap_entry
*e
,
1359 struct list_head
*flow_list
)
1361 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1362 struct mlx5_esw_flow_attr slow_attr
;
1363 struct mlx5_flow_handle
*rule
;
1364 struct mlx5_flow_spec
*spec
;
1365 struct mlx5e_tc_flow
*flow
;
1368 list_for_each_entry(flow
, flow_list
, tmp_list
) {
1369 if (!mlx5e_is_offloaded_flow(flow
))
1371 spec
= &flow
->esw_attr
->parse_attr
->spec
;
1373 /* update from encap rule to slow path rule */
1374 rule
= mlx5e_tc_offload_to_slow_path(esw
, flow
, spec
, &slow_attr
);
1375 /* mark the flow's encap dest as non-valid */
1376 flow
->esw_attr
->dests
[flow
->tmp_efi_index
].flags
&= ~MLX5_ESW_DEST_ENCAP_VALID
;
1379 err
= PTR_ERR(rule
);
1380 mlx5_core_warn(priv
->mdev
, "Failed to update slow path (encap) flow, %d\n",
1385 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, flow
->esw_attr
);
1386 flow
->rule
[0] = rule
;
1387 /* was unset when fast path rule removed */
1388 flow_flag_set(flow
, OFFLOADED
);
1391 /* we know that the encap is valid */
1392 e
->flags
&= ~MLX5_ENCAP_ENTRY_VALID
;
1393 mlx5_packet_reformat_dealloc(priv
->mdev
, e
->pkt_reformat
);
1396 static struct mlx5_fc
*mlx5e_tc_get_counter(struct mlx5e_tc_flow
*flow
)
1398 if (mlx5e_is_eswitch_flow(flow
))
1399 return flow
->esw_attr
->counter
;
1401 return flow
->nic_attr
->counter
;
1404 /* Takes reference to all flows attached to encap and adds the flows to
1405 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
1407 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry
*e
, struct list_head
*flow_list
)
1409 struct encap_flow_item
*efi
;
1410 struct mlx5e_tc_flow
*flow
;
1412 list_for_each_entry(efi
, &e
->flows
, list
) {
1413 flow
= container_of(efi
, struct mlx5e_tc_flow
, encaps
[efi
->index
]);
1414 if (IS_ERR(mlx5e_flow_get(flow
)))
1416 wait_for_completion(&flow
->init_done
);
1418 flow
->tmp_efi_index
= efi
->index
;
1419 list_add(&flow
->tmp_list
, flow_list
);
1423 /* Iterate over tmp_list of flows attached to flow_list head. */
1424 void mlx5e_put_encap_flow_list(struct mlx5e_priv
*priv
, struct list_head
*flow_list
)
1426 struct mlx5e_tc_flow
*flow
, *tmp
;
1428 list_for_each_entry_safe(flow
, tmp
, flow_list
, tmp_list
)
1429 mlx5e_flow_put(priv
, flow
);
1432 static struct mlx5e_encap_entry
*
1433 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry
*nhe
,
1434 struct mlx5e_encap_entry
*e
)
1436 struct mlx5e_encap_entry
*next
= NULL
;
1441 /* find encap with non-zero reference counter value */
1443 list_next_or_null_rcu(&nhe
->encap_list
,
1445 struct mlx5e_encap_entry
,
1447 list_first_or_null_rcu(&nhe
->encap_list
,
1448 struct mlx5e_encap_entry
,
1451 next
= list_next_or_null_rcu(&nhe
->encap_list
,
1453 struct mlx5e_encap_entry
,
1455 if (mlx5e_encap_take(next
))
1460 /* release starting encap */
1462 mlx5e_encap_put(netdev_priv(e
->out_dev
), e
);
1466 /* wait for encap to be fully initialized */
1467 wait_for_completion(&next
->res_ready
);
1468 /* continue searching if encap entry is not in valid state after completion */
1469 if (!(next
->flags
& MLX5_ENCAP_ENTRY_VALID
)) {
1477 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry
*nhe
)
1479 struct mlx5e_neigh
*m_neigh
= &nhe
->m_neigh
;
1480 struct mlx5e_encap_entry
*e
= NULL
;
1481 struct mlx5e_tc_flow
*flow
;
1482 struct mlx5_fc
*counter
;
1483 struct neigh_table
*tbl
;
1484 bool neigh_used
= false;
1485 struct neighbour
*n
;
1488 if (m_neigh
->family
== AF_INET
)
1490 #if IS_ENABLED(CONFIG_IPV6)
1491 else if (m_neigh
->family
== AF_INET6
)
1492 tbl
= ipv6_stub
->nd_tbl
;
1497 /* mlx5e_get_next_valid_encap() releases previous encap before returning
1500 while ((e
= mlx5e_get_next_valid_encap(nhe
, e
)) != NULL
) {
1501 struct mlx5e_priv
*priv
= netdev_priv(e
->out_dev
);
1502 struct encap_flow_item
*efi
, *tmp
;
1503 struct mlx5_eswitch
*esw
;
1504 LIST_HEAD(flow_list
);
1506 esw
= priv
->mdev
->priv
.eswitch
;
1507 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
1508 list_for_each_entry_safe(efi
, tmp
, &e
->flows
, list
) {
1509 flow
= container_of(efi
, struct mlx5e_tc_flow
,
1510 encaps
[efi
->index
]);
1511 if (IS_ERR(mlx5e_flow_get(flow
)))
1513 list_add(&flow
->tmp_list
, &flow_list
);
1515 if (mlx5e_is_offloaded_flow(flow
)) {
1516 counter
= mlx5e_tc_get_counter(flow
);
1517 lastuse
= mlx5_fc_query_lastuse(counter
);
1518 if (time_after((unsigned long)lastuse
, nhe
->reported_lastuse
)) {
1524 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
1526 mlx5e_put_encap_flow_list(priv
, &flow_list
);
1528 /* release current encap before breaking the loop */
1529 mlx5e_encap_put(priv
, e
);
1534 trace_mlx5e_tc_update_neigh_used_value(nhe
, neigh_used
);
1537 nhe
->reported_lastuse
= jiffies
;
1539 /* find the relevant neigh according to the cached device and
1542 n
= neigh_lookup(tbl
, &m_neigh
->dst_ip
, m_neigh
->dev
);
1546 neigh_event_send(n
, NULL
);
1551 static void mlx5e_encap_dealloc(struct mlx5e_priv
*priv
, struct mlx5e_encap_entry
*e
)
1553 WARN_ON(!list_empty(&e
->flows
));
1555 if (e
->compl_result
> 0) {
1556 mlx5e_rep_encap_entry_detach(netdev_priv(e
->out_dev
), e
);
1558 if (e
->flags
& MLX5_ENCAP_ENTRY_VALID
)
1559 mlx5_packet_reformat_dealloc(priv
->mdev
, e
->pkt_reformat
);
1562 kfree(e
->encap_header
);
1566 void mlx5e_encap_put(struct mlx5e_priv
*priv
, struct mlx5e_encap_entry
*e
)
1568 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1570 if (!refcount_dec_and_mutex_lock(&e
->refcnt
, &esw
->offloads
.encap_tbl_lock
))
1572 hash_del_rcu(&e
->encap_hlist
);
1573 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
1575 mlx5e_encap_dealloc(priv
, e
);
1578 static void mlx5e_detach_encap(struct mlx5e_priv
*priv
,
1579 struct mlx5e_tc_flow
*flow
, int out_index
)
1581 struct mlx5e_encap_entry
*e
= flow
->encaps
[out_index
].e
;
1582 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1584 /* flow wasn't fully initialized */
1588 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
1589 list_del(&flow
->encaps
[out_index
].list
);
1590 flow
->encaps
[out_index
].e
= NULL
;
1591 if (!refcount_dec_and_test(&e
->refcnt
)) {
1592 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
1595 hash_del_rcu(&e
->encap_hlist
);
1596 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
1598 mlx5e_encap_dealloc(priv
, e
);
1601 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow
*flow
)
1603 struct mlx5_eswitch
*esw
= flow
->priv
->mdev
->priv
.eswitch
;
1605 if (!flow_flag_test(flow
, ESWITCH
) ||
1606 !flow_flag_test(flow
, DUP
))
1609 mutex_lock(&esw
->offloads
.peer_mutex
);
1610 list_del(&flow
->peer
);
1611 mutex_unlock(&esw
->offloads
.peer_mutex
);
1613 flow_flag_clear(flow
, DUP
);
1615 mlx5e_tc_del_fdb_flow(flow
->peer_flow
->priv
, flow
->peer_flow
);
1616 kvfree(flow
->peer_flow
);
1617 flow
->peer_flow
= NULL
;
1620 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow
*flow
)
1622 struct mlx5_core_dev
*dev
= flow
->priv
->mdev
;
1623 struct mlx5_devcom
*devcom
= dev
->priv
.devcom
;
1624 struct mlx5_eswitch
*peer_esw
;
1626 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
1630 __mlx5e_tc_del_fdb_peer_flow(flow
);
1631 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
1634 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
1635 struct mlx5e_tc_flow
*flow
)
1637 if (mlx5e_is_eswitch_flow(flow
)) {
1638 mlx5e_tc_del_fdb_peer_flow(flow
);
1639 mlx5e_tc_del_fdb_flow(priv
, flow
);
1641 mlx5e_tc_del_nic_flow(priv
, flow
);
1646 static int parse_tunnel_attr(struct mlx5e_priv
*priv
,
1647 struct mlx5_flow_spec
*spec
,
1648 struct flow_cls_offload
*f
,
1649 struct net_device
*filter_dev
, u8
*match_level
)
1651 struct netlink_ext_ack
*extack
= f
->common
.extack
;
1652 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1654 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1656 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
1659 err
= mlx5e_tc_tun_parse(filter_dev
, priv
, spec
, f
,
1660 headers_c
, headers_v
, match_level
);
1662 NL_SET_ERR_MSG_MOD(extack
,
1663 "failed to parse tunnel attributes");
1667 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ENC_CONTROL
)) {
1668 struct flow_match_control match
;
1671 flow_rule_match_enc_control(rule
, &match
);
1672 addr_type
= match
.key
->addr_type
;
1674 /* For tunnel addr_type used same key id`s as for non-tunnel */
1675 if (addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
1676 struct flow_match_ipv4_addrs match
;
1678 flow_rule_match_enc_ipv4_addrs(rule
, &match
);
1679 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
1680 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
1681 ntohl(match
.mask
->src
));
1682 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
1683 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
1684 ntohl(match
.key
->src
));
1686 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
1687 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
1688 ntohl(match
.mask
->dst
));
1689 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
1690 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
1691 ntohl(match
.key
->dst
));
1693 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
,
1695 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
1697 } else if (addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
1698 struct flow_match_ipv6_addrs match
;
1700 flow_rule_match_enc_ipv6_addrs(rule
, &match
);
1701 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
1702 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
1703 &match
.mask
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
,
1705 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1706 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
1707 &match
.key
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
,
1710 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
1711 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
1712 &match
.mask
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
,
1714 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1715 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
1716 &match
.key
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
,
1719 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
,
1721 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
1726 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ENC_IP
)) {
1727 struct flow_match_ip match
;
1729 flow_rule_match_enc_ip(rule
, &match
);
1730 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_ecn
,
1731 match
.mask
->tos
& 0x3);
1732 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_ecn
,
1733 match
.key
->tos
& 0x3);
1735 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_dscp
,
1736 match
.mask
->tos
>> 2);
1737 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_dscp
,
1738 match
.key
->tos
>> 2);
1740 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ttl_hoplimit
,
1742 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ttl_hoplimit
,
1745 if (match
.mask
->ttl
&&
1746 !MLX5_CAP_ESW_FLOWTABLE_FDB
1748 ft_field_support
.outer_ipv4_ttl
)) {
1749 NL_SET_ERR_MSG_MOD(extack
,
1750 "Matching on TTL is not supported");
1756 /* Enforce DMAC when offloading incoming tunneled flows.
1757 * Flow counters require a match on the DMAC.
1759 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_47_16
);
1760 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_15_0
);
1761 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1762 dmac_47_16
), priv
->netdev
->dev_addr
);
1764 /* let software handle IP fragments */
1765 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
1766 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
, 0);
1771 static void *get_match_headers_criteria(u32 flags
,
1772 struct mlx5_flow_spec
*spec
)
1774 return (flags
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ?
1775 MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1777 MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1781 static void *get_match_headers_value(u32 flags
,
1782 struct mlx5_flow_spec
*spec
)
1784 return (flags
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ?
1785 MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1787 MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1791 static int __parse_cls_flower(struct mlx5e_priv
*priv
,
1792 struct mlx5_flow_spec
*spec
,
1793 struct flow_cls_offload
*f
,
1794 struct net_device
*filter_dev
,
1795 u8
*inner_match_level
, u8
*outer_match_level
)
1797 struct netlink_ext_ack
*extack
= f
->common
.extack
;
1798 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1800 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1802 void *misc_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1804 void *misc_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1806 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
1807 struct flow_dissector
*dissector
= rule
->match
.dissector
;
1812 match_level
= outer_match_level
;
1814 if (dissector
->used_keys
&
1815 ~(BIT(FLOW_DISSECTOR_KEY_META
) |
1816 BIT(FLOW_DISSECTOR_KEY_CONTROL
) |
1817 BIT(FLOW_DISSECTOR_KEY_BASIC
) |
1818 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS
) |
1819 BIT(FLOW_DISSECTOR_KEY_VLAN
) |
1820 BIT(FLOW_DISSECTOR_KEY_CVLAN
) |
1821 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS
) |
1822 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS
) |
1823 BIT(FLOW_DISSECTOR_KEY_PORTS
) |
1824 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID
) |
1825 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) |
1826 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
) |
1827 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS
) |
1828 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL
) |
1829 BIT(FLOW_DISSECTOR_KEY_TCP
) |
1830 BIT(FLOW_DISSECTOR_KEY_IP
) |
1831 BIT(FLOW_DISSECTOR_KEY_ENC_IP
) |
1832 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS
))) {
1833 NL_SET_ERR_MSG_MOD(extack
, "Unsupported key");
1834 netdev_warn(priv
->netdev
, "Unsupported key used: 0x%x\n",
1835 dissector
->used_keys
);
1839 if (mlx5e_get_tc_tun(filter_dev
)) {
1840 if (parse_tunnel_attr(priv
, spec
, f
, filter_dev
,
1844 /* At this point, header pointers should point to the inner
1845 * headers, outer header were already set by parse_tunnel_attr
1847 match_level
= inner_match_level
;
1848 headers_c
= get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP
,
1850 headers_v
= get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP
,
1854 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_BASIC
)) {
1855 struct flow_match_basic match
;
1857 flow_rule_match_basic(rule
, &match
);
1858 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ethertype
,
1859 ntohs(match
.mask
->n_proto
));
1860 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
1861 ntohs(match
.key
->n_proto
));
1863 if (match
.mask
->n_proto
)
1864 *match_level
= MLX5_MATCH_L2
;
1866 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_VLAN
) ||
1867 is_vlan_dev(filter_dev
)) {
1868 struct flow_dissector_key_vlan filter_dev_mask
;
1869 struct flow_dissector_key_vlan filter_dev_key
;
1870 struct flow_match_vlan match
;
1872 if (is_vlan_dev(filter_dev
)) {
1873 match
.key
= &filter_dev_key
;
1874 match
.key
->vlan_id
= vlan_dev_vlan_id(filter_dev
);
1875 match
.key
->vlan_tpid
= vlan_dev_vlan_proto(filter_dev
);
1876 match
.key
->vlan_priority
= 0;
1877 match
.mask
= &filter_dev_mask
;
1878 memset(match
.mask
, 0xff, sizeof(*match
.mask
));
1879 match
.mask
->vlan_priority
= 0;
1881 flow_rule_match_vlan(rule
, &match
);
1883 if (match
.mask
->vlan_id
||
1884 match
.mask
->vlan_priority
||
1885 match
.mask
->vlan_tpid
) {
1886 if (match
.key
->vlan_tpid
== htons(ETH_P_8021AD
)) {
1887 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
1889 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
1892 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
1894 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
1898 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_vid
,
1899 match
.mask
->vlan_id
);
1900 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_vid
,
1901 match
.key
->vlan_id
);
1903 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_prio
,
1904 match
.mask
->vlan_priority
);
1905 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_prio
,
1906 match
.key
->vlan_priority
);
1908 *match_level
= MLX5_MATCH_L2
;
1910 } else if (*match_level
!= MLX5_MATCH_NONE
) {
1911 /* cvlan_tag enabled in match criteria and
1912 * disabled in match value means both S & C tags
1913 * don't exist (untagged of both)
1915 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
, 1);
1916 *match_level
= MLX5_MATCH_L2
;
1919 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_CVLAN
)) {
1920 struct flow_match_vlan match
;
1922 flow_rule_match_cvlan(rule
, &match
);
1923 if (match
.mask
->vlan_id
||
1924 match
.mask
->vlan_priority
||
1925 match
.mask
->vlan_tpid
) {
1926 if (match
.key
->vlan_tpid
== htons(ETH_P_8021AD
)) {
1927 MLX5_SET(fte_match_set_misc
, misc_c
,
1928 outer_second_svlan_tag
, 1);
1929 MLX5_SET(fte_match_set_misc
, misc_v
,
1930 outer_second_svlan_tag
, 1);
1932 MLX5_SET(fte_match_set_misc
, misc_c
,
1933 outer_second_cvlan_tag
, 1);
1934 MLX5_SET(fte_match_set_misc
, misc_v
,
1935 outer_second_cvlan_tag
, 1);
1938 MLX5_SET(fte_match_set_misc
, misc_c
, outer_second_vid
,
1939 match
.mask
->vlan_id
);
1940 MLX5_SET(fte_match_set_misc
, misc_v
, outer_second_vid
,
1941 match
.key
->vlan_id
);
1942 MLX5_SET(fte_match_set_misc
, misc_c
, outer_second_prio
,
1943 match
.mask
->vlan_priority
);
1944 MLX5_SET(fte_match_set_misc
, misc_v
, outer_second_prio
,
1945 match
.key
->vlan_priority
);
1947 *match_level
= MLX5_MATCH_L2
;
1951 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ETH_ADDRS
)) {
1952 struct flow_match_eth_addrs match
;
1954 flow_rule_match_eth_addrs(rule
, &match
);
1955 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
1958 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1962 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
1965 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1969 if (!is_zero_ether_addr(match
.mask
->src
) ||
1970 !is_zero_ether_addr(match
.mask
->dst
))
1971 *match_level
= MLX5_MATCH_L2
;
1974 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_CONTROL
)) {
1975 struct flow_match_control match
;
1977 flow_rule_match_control(rule
, &match
);
1978 addr_type
= match
.key
->addr_type
;
1980 /* the HW doesn't support frag first/later */
1981 if (match
.mask
->flags
& FLOW_DIS_FIRST_FRAG
)
1984 if (match
.mask
->flags
& FLOW_DIS_IS_FRAGMENT
) {
1985 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
1986 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
,
1987 match
.key
->flags
& FLOW_DIS_IS_FRAGMENT
);
1989 /* the HW doesn't need L3 inline to match on frag=no */
1990 if (!(match
.key
->flags
& FLOW_DIS_IS_FRAGMENT
))
1991 *match_level
= MLX5_MATCH_L2
;
1992 /* *** L2 attributes parsing up to here *** */
1994 *match_level
= MLX5_MATCH_L3
;
1998 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_BASIC
)) {
1999 struct flow_match_basic match
;
2001 flow_rule_match_basic(rule
, &match
);
2002 ip_proto
= match
.key
->ip_proto
;
2004 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
,
2005 match
.mask
->ip_proto
);
2006 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
,
2007 match
.key
->ip_proto
);
2009 if (match
.mask
->ip_proto
)
2010 *match_level
= MLX5_MATCH_L3
;
2013 if (addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
2014 struct flow_match_ipv4_addrs match
;
2016 flow_rule_match_ipv4_addrs(rule
, &match
);
2017 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2018 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2019 &match
.mask
->src
, sizeof(match
.mask
->src
));
2020 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2021 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2022 &match
.key
->src
, sizeof(match
.key
->src
));
2023 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2024 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2025 &match
.mask
->dst
, sizeof(match
.mask
->dst
));
2026 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2027 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2028 &match
.key
->dst
, sizeof(match
.key
->dst
));
2030 if (match
.mask
->src
|| match
.mask
->dst
)
2031 *match_level
= MLX5_MATCH_L3
;
2034 if (addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
2035 struct flow_match_ipv6_addrs match
;
2037 flow_rule_match_ipv6_addrs(rule
, &match
);
2038 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2039 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
2040 &match
.mask
->src
, sizeof(match
.mask
->src
));
2041 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2042 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
2043 &match
.key
->src
, sizeof(match
.key
->src
));
2045 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2046 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
2047 &match
.mask
->dst
, sizeof(match
.mask
->dst
));
2048 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2049 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
2050 &match
.key
->dst
, sizeof(match
.key
->dst
));
2052 if (ipv6_addr_type(&match
.mask
->src
) != IPV6_ADDR_ANY
||
2053 ipv6_addr_type(&match
.mask
->dst
) != IPV6_ADDR_ANY
)
2054 *match_level
= MLX5_MATCH_L3
;
2057 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_IP
)) {
2058 struct flow_match_ip match
;
2060 flow_rule_match_ip(rule
, &match
);
2061 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_ecn
,
2062 match
.mask
->tos
& 0x3);
2063 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_ecn
,
2064 match
.key
->tos
& 0x3);
2066 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_dscp
,
2067 match
.mask
->tos
>> 2);
2068 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_dscp
,
2069 match
.key
->tos
>> 2);
2071 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ttl_hoplimit
,
2073 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ttl_hoplimit
,
2076 if (match
.mask
->ttl
&&
2077 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv
->mdev
,
2078 ft_field_support
.outer_ipv4_ttl
)) {
2079 NL_SET_ERR_MSG_MOD(extack
,
2080 "Matching on TTL is not supported");
2084 if (match
.mask
->tos
|| match
.mask
->ttl
)
2085 *match_level
= MLX5_MATCH_L3
;
2088 /* *** L3 attributes parsing up to here *** */
2090 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_PORTS
)) {
2091 struct flow_match_ports match
;
2093 flow_rule_match_ports(rule
, &match
);
2096 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2097 tcp_sport
, ntohs(match
.mask
->src
));
2098 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2099 tcp_sport
, ntohs(match
.key
->src
));
2101 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2102 tcp_dport
, ntohs(match
.mask
->dst
));
2103 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2104 tcp_dport
, ntohs(match
.key
->dst
));
2108 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2109 udp_sport
, ntohs(match
.mask
->src
));
2110 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2111 udp_sport
, ntohs(match
.key
->src
));
2113 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2114 udp_dport
, ntohs(match
.mask
->dst
));
2115 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2116 udp_dport
, ntohs(match
.key
->dst
));
2119 NL_SET_ERR_MSG_MOD(extack
,
2120 "Only UDP and TCP transports are supported for L4 matching");
2121 netdev_err(priv
->netdev
,
2122 "Only UDP and TCP transport are supported\n");
2126 if (match
.mask
->src
|| match
.mask
->dst
)
2127 *match_level
= MLX5_MATCH_L4
;
2130 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_TCP
)) {
2131 struct flow_match_tcp match
;
2133 flow_rule_match_tcp(rule
, &match
);
2134 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, tcp_flags
,
2135 ntohs(match
.mask
->flags
));
2136 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, tcp_flags
,
2137 ntohs(match
.key
->flags
));
2139 if (match
.mask
->flags
)
2140 *match_level
= MLX5_MATCH_L4
;
2146 static int parse_cls_flower(struct mlx5e_priv
*priv
,
2147 struct mlx5e_tc_flow
*flow
,
2148 struct mlx5_flow_spec
*spec
,
2149 struct flow_cls_offload
*f
,
2150 struct net_device
*filter_dev
)
2152 u8 inner_match_level
, outer_match_level
, non_tunnel_match_level
;
2153 struct netlink_ext_ack
*extack
= f
->common
.extack
;
2154 struct mlx5_core_dev
*dev
= priv
->mdev
;
2155 struct mlx5_eswitch
*esw
= dev
->priv
.eswitch
;
2156 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
2157 struct mlx5_eswitch_rep
*rep
;
2158 bool is_eswitch_flow
;
2161 inner_match_level
= MLX5_MATCH_NONE
;
2162 outer_match_level
= MLX5_MATCH_NONE
;
2164 err
= __parse_cls_flower(priv
, spec
, f
, filter_dev
, &inner_match_level
,
2165 &outer_match_level
);
2166 non_tunnel_match_level
= (inner_match_level
== MLX5_MATCH_NONE
) ?
2167 outer_match_level
: inner_match_level
;
2169 is_eswitch_flow
= mlx5e_is_eswitch_flow(flow
);
2170 if (!err
&& is_eswitch_flow
) {
2172 if (rep
->vport
!= MLX5_VPORT_UPLINK
&&
2173 (esw
->offloads
.inline_mode
!= MLX5_INLINE_MODE_NONE
&&
2174 esw
->offloads
.inline_mode
< non_tunnel_match_level
)) {
2175 NL_SET_ERR_MSG_MOD(extack
,
2176 "Flow is not offloaded due to min inline setting");
2177 netdev_warn(priv
->netdev
,
2178 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2179 non_tunnel_match_level
, esw
->offloads
.inline_mode
);
2184 if (is_eswitch_flow
) {
2185 flow
->esw_attr
->inner_match_level
= inner_match_level
;
2186 flow
->esw_attr
->outer_match_level
= outer_match_level
;
2188 flow
->nic_attr
->match_level
= non_tunnel_match_level
;
2194 struct pedit_headers
{
2196 struct vlan_hdr vlan
;
2203 struct pedit_headers_action
{
2204 struct pedit_headers vals
;
2205 struct pedit_headers masks
;
2209 static int pedit_header_offsets
[] = {
2210 [FLOW_ACT_MANGLE_HDR_TYPE_ETH
] = offsetof(struct pedit_headers
, eth
),
2211 [FLOW_ACT_MANGLE_HDR_TYPE_IP4
] = offsetof(struct pedit_headers
, ip4
),
2212 [FLOW_ACT_MANGLE_HDR_TYPE_IP6
] = offsetof(struct pedit_headers
, ip6
),
2213 [FLOW_ACT_MANGLE_HDR_TYPE_TCP
] = offsetof(struct pedit_headers
, tcp
),
2214 [FLOW_ACT_MANGLE_HDR_TYPE_UDP
] = offsetof(struct pedit_headers
, udp
),
2217 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2219 static int set_pedit_val(u8 hdr_type
, u32 mask
, u32 val
, u32 offset
,
2220 struct pedit_headers_action
*hdrs
)
2222 u32
*curr_pmask
, *curr_pval
;
2224 curr_pmask
= (u32
*)(pedit_header(&hdrs
->masks
, hdr_type
) + offset
);
2225 curr_pval
= (u32
*)(pedit_header(&hdrs
->vals
, hdr_type
) + offset
);
2227 if (*curr_pmask
& mask
) /* disallow acting twice on the same location */
2230 *curr_pmask
|= mask
;
2231 *curr_pval
|= (val
& mask
);
2239 struct mlx5_fields
{
2246 #define OFFLOAD(fw_field, size, field, off, match_field) \
2247 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, \
2248 offsetof(struct pedit_headers, field) + (off), \
2249 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2251 /* masked values are the same and there are no rewrites that do not have a
2254 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2255 type matchmaskx = *(type *)(matchmaskp); \
2256 type matchvalx = *(type *)(matchvalp); \
2257 type maskx = *(type *)(maskp); \
2258 type valx = *(type *)(valp); \
2260 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2264 static bool cmp_val_mask(void *valp
, void *maskp
, void *matchvalp
,
2265 void *matchmaskp
, int size
)
2271 same
= SAME_VAL_MASK(u8
, valp
, maskp
, matchvalp
, matchmaskp
);
2274 same
= SAME_VAL_MASK(u16
, valp
, maskp
, matchvalp
, matchmaskp
);
2277 same
= SAME_VAL_MASK(u32
, valp
, maskp
, matchvalp
, matchmaskp
);
2284 static struct mlx5_fields fields
[] = {
2285 OFFLOAD(DMAC_47_16
, 4, eth
.h_dest
[0], 0, dmac_47_16
),
2286 OFFLOAD(DMAC_15_0
, 2, eth
.h_dest
[4], 0, dmac_15_0
),
2287 OFFLOAD(SMAC_47_16
, 4, eth
.h_source
[0], 0, smac_47_16
),
2288 OFFLOAD(SMAC_15_0
, 2, eth
.h_source
[4], 0, smac_15_0
),
2289 OFFLOAD(ETHERTYPE
, 2, eth
.h_proto
, 0, ethertype
),
2290 OFFLOAD(FIRST_VID
, 2, vlan
.h_vlan_TCI
, 0, first_vid
),
2292 OFFLOAD(IP_TTL
, 1, ip4
.ttl
, 0, ttl_hoplimit
),
2293 OFFLOAD(SIPV4
, 4, ip4
.saddr
, 0, src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2294 OFFLOAD(DIPV4
, 4, ip4
.daddr
, 0, dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2296 OFFLOAD(SIPV6_127_96
, 4, ip6
.saddr
.s6_addr32
[0], 0,
2297 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[0]),
2298 OFFLOAD(SIPV6_95_64
, 4, ip6
.saddr
.s6_addr32
[1], 0,
2299 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[4]),
2300 OFFLOAD(SIPV6_63_32
, 4, ip6
.saddr
.s6_addr32
[2], 0,
2301 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[8]),
2302 OFFLOAD(SIPV6_31_0
, 4, ip6
.saddr
.s6_addr32
[3], 0,
2303 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[12]),
2304 OFFLOAD(DIPV6_127_96
, 4, ip6
.daddr
.s6_addr32
[0], 0,
2305 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[0]),
2306 OFFLOAD(DIPV6_95_64
, 4, ip6
.daddr
.s6_addr32
[1], 0,
2307 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[4]),
2308 OFFLOAD(DIPV6_63_32
, 4, ip6
.daddr
.s6_addr32
[2], 0,
2309 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[8]),
2310 OFFLOAD(DIPV6_31_0
, 4, ip6
.daddr
.s6_addr32
[3], 0,
2311 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[12]),
2312 OFFLOAD(IPV6_HOPLIMIT
, 1, ip6
.hop_limit
, 0, ttl_hoplimit
),
2314 OFFLOAD(TCP_SPORT
, 2, tcp
.source
, 0, tcp_sport
),
2315 OFFLOAD(TCP_DPORT
, 2, tcp
.dest
, 0, tcp_dport
),
2316 OFFLOAD(TCP_FLAGS
, 1, tcp
.ack_seq
, 5, tcp_flags
),
2318 OFFLOAD(UDP_SPORT
, 2, udp
.source
, 0, udp_sport
),
2319 OFFLOAD(UDP_DPORT
, 2, udp
.dest
, 0, udp_dport
),
2322 /* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
2323 * max from the SW pedit action. On success, attr->num_mod_hdr_actions
2324 * says how many HW actions were actually parsed.
2326 static int offload_pedit_fields(struct pedit_headers_action
*hdrs
,
2327 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2329 struct netlink_ext_ack
*extack
)
2331 struct pedit_headers
*set_masks
, *add_masks
, *set_vals
, *add_vals
;
2332 void *headers_c
= get_match_headers_criteria(*action_flags
,
2334 void *headers_v
= get_match_headers_value(*action_flags
,
2336 int i
, action_size
, nactions
, max_actions
, first
, last
, next_z
;
2337 void *s_masks_p
, *a_masks_p
, *vals_p
;
2338 struct mlx5_fields
*f
;
2339 u8 cmd
, field_bsize
;
2346 set_masks
= &hdrs
[0].masks
;
2347 add_masks
= &hdrs
[1].masks
;
2348 set_vals
= &hdrs
[0].vals
;
2349 add_vals
= &hdrs
[1].vals
;
2351 action_size
= MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto
);
2352 action
= parse_attr
->mod_hdr_actions
+
2353 parse_attr
->num_mod_hdr_actions
* action_size
;
2355 max_actions
= parse_attr
->max_mod_hdr_actions
;
2356 nactions
= parse_attr
->num_mod_hdr_actions
;
2358 for (i
= 0; i
< ARRAY_SIZE(fields
); i
++) {
2362 /* avoid seeing bits set from previous iterations */
2366 s_masks_p
= (void *)set_masks
+ f
->offset
;
2367 a_masks_p
= (void *)add_masks
+ f
->offset
;
2369 memcpy(&s_mask
, s_masks_p
, f
->size
);
2370 memcpy(&a_mask
, a_masks_p
, f
->size
);
2372 if (!s_mask
&& !a_mask
) /* nothing to offload here */
2375 if (s_mask
&& a_mask
) {
2376 NL_SET_ERR_MSG_MOD(extack
,
2377 "can't set and add to the same HW field");
2378 printk(KERN_WARNING
"mlx5: can't set and add to the same HW field (%x)\n", f
->field
);
2382 if (nactions
== max_actions
) {
2383 NL_SET_ERR_MSG_MOD(extack
,
2384 "too many pedit actions, can't offload");
2385 printk(KERN_WARNING
"mlx5: parsed %d pedit actions, can't do more\n", nactions
);
2391 void *match_mask
= headers_c
+ f
->match_offset
;
2392 void *match_val
= headers_v
+ f
->match_offset
;
2394 cmd
= MLX5_ACTION_TYPE_SET
;
2396 vals_p
= (void *)set_vals
+ f
->offset
;
2397 /* don't rewrite if we have a match on the same value */
2398 if (cmp_val_mask(vals_p
, s_masks_p
, match_val
,
2399 match_mask
, f
->size
))
2401 /* clear to denote we consumed this field */
2402 memset(s_masks_p
, 0, f
->size
);
2406 cmd
= MLX5_ACTION_TYPE_ADD
;
2408 vals_p
= (void *)add_vals
+ f
->offset
;
2409 /* add 0 is no change */
2410 if (!memcmp(vals_p
, &zero
, f
->size
))
2412 /* clear to denote we consumed this field */
2413 memset(a_masks_p
, 0, f
->size
);
2418 field_bsize
= f
->size
* BITS_PER_BYTE
;
2420 if (field_bsize
== 32) {
2421 mask_be32
= *(__be32
*)&mask
;
2422 mask
= (__force
unsigned long)cpu_to_le32(be32_to_cpu(mask_be32
));
2423 } else if (field_bsize
== 16) {
2424 mask_be16
= *(__be16
*)&mask
;
2425 mask
= (__force
unsigned long)cpu_to_le16(be16_to_cpu(mask_be16
));
2428 first
= find_first_bit(&mask
, field_bsize
);
2429 next_z
= find_next_zero_bit(&mask
, field_bsize
, first
);
2430 last
= find_last_bit(&mask
, field_bsize
);
2431 if (first
< next_z
&& next_z
< last
) {
2432 NL_SET_ERR_MSG_MOD(extack
,
2433 "rewrite of few sub-fields isn't supported");
2434 printk(KERN_WARNING
"mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2439 MLX5_SET(set_action_in
, action
, action_type
, cmd
);
2440 MLX5_SET(set_action_in
, action
, field
, f
->field
);
2442 if (cmd
== MLX5_ACTION_TYPE_SET
) {
2443 MLX5_SET(set_action_in
, action
, offset
, first
);
2444 /* length is num of bits to be written, zero means length of 32 */
2445 MLX5_SET(set_action_in
, action
, length
, (last
- first
+ 1));
2448 if (field_bsize
== 32)
2449 MLX5_SET(set_action_in
, action
, data
, ntohl(*(__be32
*)vals_p
) >> first
);
2450 else if (field_bsize
== 16)
2451 MLX5_SET(set_action_in
, action
, data
, ntohs(*(__be16
*)vals_p
) >> first
);
2452 else if (field_bsize
== 8)
2453 MLX5_SET(set_action_in
, action
, data
, *(u8
*)vals_p
>> first
);
2455 action
+= action_size
;
2459 parse_attr
->num_mod_hdr_actions
= nactions
;
2463 static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev
*mdev
,
2466 if (namespace == MLX5_FLOW_NAMESPACE_FDB
) /* FDB offloading */
2467 return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev
, max_modify_header_actions
);
2468 else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
2469 return MLX5_CAP_FLOWTABLE_NIC_RX(mdev
, max_modify_header_actions
);
2472 static int alloc_mod_hdr_actions(struct mlx5e_priv
*priv
,
2473 struct pedit_headers_action
*hdrs
,
2475 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
2477 int nkeys
, action_size
, max_actions
;
2479 nkeys
= hdrs
[TCA_PEDIT_KEY_EX_CMD_SET
].pedits
+
2480 hdrs
[TCA_PEDIT_KEY_EX_CMD_ADD
].pedits
;
2481 action_size
= MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto
);
2483 max_actions
= mlx5e_flow_namespace_max_modify_action(priv
->mdev
, namespace);
2484 /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
2485 max_actions
= min(max_actions
, nkeys
* 16);
2487 parse_attr
->mod_hdr_actions
= kcalloc(max_actions
, action_size
, GFP_KERNEL
);
2488 if (!parse_attr
->mod_hdr_actions
)
2491 parse_attr
->max_mod_hdr_actions
= max_actions
;
2495 static const struct pedit_headers zero_masks
= {};
2497 static int parse_tc_pedit_action(struct mlx5e_priv
*priv
,
2498 const struct flow_action_entry
*act
, int namespace,
2499 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2500 struct pedit_headers_action
*hdrs
,
2501 struct netlink_ext_ack
*extack
)
2503 u8 cmd
= (act
->id
== FLOW_ACTION_MANGLE
) ? 0 : 1;
2504 int err
= -EOPNOTSUPP
;
2505 u32 mask
, val
, offset
;
2508 htype
= act
->mangle
.htype
;
2509 err
= -EOPNOTSUPP
; /* can't be all optimistic */
2511 if (htype
== FLOW_ACT_MANGLE_UNSPEC
) {
2512 NL_SET_ERR_MSG_MOD(extack
, "legacy pedit isn't offloaded");
2516 if (!mlx5e_flow_namespace_max_modify_action(priv
->mdev
, namespace)) {
2517 NL_SET_ERR_MSG_MOD(extack
,
2518 "The pedit offload action is not supported");
2522 mask
= act
->mangle
.mask
;
2523 val
= act
->mangle
.val
;
2524 offset
= act
->mangle
.offset
;
2526 err
= set_pedit_val(htype
, ~mask
, val
, offset
, &hdrs
[cmd
]);
2537 static int alloc_tc_pedit_action(struct mlx5e_priv
*priv
, int namespace,
2538 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2539 struct pedit_headers_action
*hdrs
,
2541 struct netlink_ext_ack
*extack
)
2543 struct pedit_headers
*cmd_masks
;
2547 if (!parse_attr
->mod_hdr_actions
) {
2548 err
= alloc_mod_hdr_actions(priv
, hdrs
, namespace, parse_attr
);
2553 err
= offload_pedit_fields(hdrs
, parse_attr
, action_flags
, extack
);
2555 goto out_dealloc_parsed_actions
;
2557 for (cmd
= 0; cmd
< __PEDIT_CMD_MAX
; cmd
++) {
2558 cmd_masks
= &hdrs
[cmd
].masks
;
2559 if (memcmp(cmd_masks
, &zero_masks
, sizeof(zero_masks
))) {
2560 NL_SET_ERR_MSG_MOD(extack
,
2561 "attempt to offload an unsupported field");
2562 netdev_warn(priv
->netdev
, "attempt to offload an unsupported field (cmd %d)\n", cmd
);
2563 print_hex_dump(KERN_WARNING
, "mask: ", DUMP_PREFIX_ADDRESS
,
2564 16, 1, cmd_masks
, sizeof(zero_masks
), true);
2566 goto out_dealloc_parsed_actions
;
2572 out_dealloc_parsed_actions
:
2573 kfree(parse_attr
->mod_hdr_actions
);
2578 static bool csum_offload_supported(struct mlx5e_priv
*priv
,
2581 struct netlink_ext_ack
*extack
)
2583 u32 prot_flags
= TCA_CSUM_UPDATE_FLAG_IPV4HDR
| TCA_CSUM_UPDATE_FLAG_TCP
|
2584 TCA_CSUM_UPDATE_FLAG_UDP
;
2586 /* The HW recalcs checksums only if re-writing headers */
2587 if (!(action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)) {
2588 NL_SET_ERR_MSG_MOD(extack
,
2589 "TC csum action is only offloaded with pedit");
2590 netdev_warn(priv
->netdev
,
2591 "TC csum action is only offloaded with pedit\n");
2595 if (update_flags
& ~prot_flags
) {
2596 NL_SET_ERR_MSG_MOD(extack
,
2597 "can't offload TC csum action for some header/s");
2598 netdev_warn(priv
->netdev
,
2599 "can't offload TC csum action for some header/s - flags %#x\n",
2607 struct ip_ttl_word
{
2613 struct ipv6_hoplimit_word
{
2619 static bool is_action_keys_supported(const struct flow_action_entry
*act
)
2624 htype
= act
->mangle
.htype
;
2625 offset
= act
->mangle
.offset
;
2626 mask
= ~act
->mangle
.mask
;
2627 /* For IPv4 & IPv6 header check 4 byte word,
2628 * to determine that modified fields
2629 * are NOT ttl & hop_limit only.
2631 if (htype
== FLOW_ACT_MANGLE_HDR_TYPE_IP4
) {
2632 struct ip_ttl_word
*ttl_word
=
2633 (struct ip_ttl_word
*)&mask
;
2635 if (offset
!= offsetof(struct iphdr
, ttl
) ||
2636 ttl_word
->protocol
||
2640 } else if (htype
== FLOW_ACT_MANGLE_HDR_TYPE_IP6
) {
2641 struct ipv6_hoplimit_word
*hoplimit_word
=
2642 (struct ipv6_hoplimit_word
*)&mask
;
2644 if (offset
!= offsetof(struct ipv6hdr
, payload_len
) ||
2645 hoplimit_word
->payload_len
||
2646 hoplimit_word
->nexthdr
) {
2653 static bool modify_header_match_supported(struct mlx5_flow_spec
*spec
,
2654 struct flow_action
*flow_action
,
2656 struct netlink_ext_ack
*extack
)
2658 const struct flow_action_entry
*act
;
2659 bool modify_ip_header
;
2665 headers_v
= get_match_headers_value(actions
, spec
);
2666 ethertype
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ethertype
);
2668 /* for non-IP we only re-write MACs, so we're okay */
2669 if (ethertype
!= ETH_P_IP
&& ethertype
!= ETH_P_IPV6
)
2672 modify_ip_header
= false;
2673 flow_action_for_each(i
, act
, flow_action
) {
2674 if (act
->id
!= FLOW_ACTION_MANGLE
&&
2675 act
->id
!= FLOW_ACTION_ADD
)
2678 if (is_action_keys_supported(act
)) {
2679 modify_ip_header
= true;
2684 ip_proto
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
);
2685 if (modify_ip_header
&& ip_proto
!= IPPROTO_TCP
&&
2686 ip_proto
!= IPPROTO_UDP
&& ip_proto
!= IPPROTO_ICMP
) {
2687 NL_SET_ERR_MSG_MOD(extack
,
2688 "can't offload re-write of non TCP/UDP");
2689 pr_info("can't offload re-write of ip proto %d\n", ip_proto
);
2697 static bool actions_match_supported(struct mlx5e_priv
*priv
,
2698 struct flow_action
*flow_action
,
2699 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2700 struct mlx5e_tc_flow
*flow
,
2701 struct netlink_ext_ack
*extack
)
2705 if (mlx5e_is_eswitch_flow(flow
))
2706 actions
= flow
->esw_attr
->action
;
2708 actions
= flow
->nic_attr
->action
;
2710 if (flow_flag_test(flow
, EGRESS
) &&
2711 !((actions
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ||
2712 (actions
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
) ||
2713 (actions
& MLX5_FLOW_CONTEXT_ACTION_DROP
)))
2716 if (actions
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
2717 return modify_header_match_supported(&parse_attr
->spec
,
2718 flow_action
, actions
,
2724 static bool same_hw_devs(struct mlx5e_priv
*priv
, struct mlx5e_priv
*peer_priv
)
2726 struct mlx5_core_dev
*fmdev
, *pmdev
;
2727 u64 fsystem_guid
, psystem_guid
;
2730 pmdev
= peer_priv
->mdev
;
2732 fsystem_guid
= mlx5_query_nic_system_image_guid(fmdev
);
2733 psystem_guid
= mlx5_query_nic_system_image_guid(pmdev
);
2735 return (fsystem_guid
== psystem_guid
);
2738 static int add_vlan_rewrite_action(struct mlx5e_priv
*priv
, int namespace,
2739 const struct flow_action_entry
*act
,
2740 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2741 struct pedit_headers_action
*hdrs
,
2742 u32
*action
, struct netlink_ext_ack
*extack
)
2744 u16 mask16
= VLAN_VID_MASK
;
2745 u16 val16
= act
->vlan
.vid
& VLAN_VID_MASK
;
2746 const struct flow_action_entry pedit_act
= {
2747 .id
= FLOW_ACTION_MANGLE
,
2748 .mangle
.htype
= FLOW_ACT_MANGLE_HDR_TYPE_ETH
,
2749 .mangle
.offset
= offsetof(struct vlan_ethhdr
, h_vlan_TCI
),
2750 .mangle
.mask
= ~(u32
)be16_to_cpu(*(__be16
*)&mask16
),
2751 .mangle
.val
= (u32
)be16_to_cpu(*(__be16
*)&val16
),
2753 u8 match_prio_mask
, match_prio_val
;
2754 void *headers_c
, *headers_v
;
2757 headers_c
= get_match_headers_criteria(*action
, &parse_attr
->spec
);
2758 headers_v
= get_match_headers_value(*action
, &parse_attr
->spec
);
2760 if (!(MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
) &&
2761 MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
))) {
2762 NL_SET_ERR_MSG_MOD(extack
,
2763 "VLAN rewrite action must have VLAN protocol match");
2767 match_prio_mask
= MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, first_prio
);
2768 match_prio_val
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, first_prio
);
2769 if (act
->vlan
.prio
!= (match_prio_val
& match_prio_mask
)) {
2770 NL_SET_ERR_MSG_MOD(extack
,
2771 "Changing VLAN prio is not supported");
2775 err
= parse_tc_pedit_action(priv
, &pedit_act
, namespace, parse_attr
,
2777 *action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
2783 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv
*priv
,
2784 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2785 struct pedit_headers_action
*hdrs
,
2786 u32
*action
, struct netlink_ext_ack
*extack
)
2788 const struct flow_action_entry prio_tag_act
= {
2791 MLX5_GET(fte_match_set_lyr_2_4
,
2792 get_match_headers_value(*action
,
2795 MLX5_GET(fte_match_set_lyr_2_4
,
2796 get_match_headers_criteria(*action
,
2801 return add_vlan_rewrite_action(priv
, MLX5_FLOW_NAMESPACE_FDB
,
2802 &prio_tag_act
, parse_attr
, hdrs
, action
,
2806 static int parse_tc_nic_actions(struct mlx5e_priv
*priv
,
2807 struct flow_action
*flow_action
,
2808 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2809 struct mlx5e_tc_flow
*flow
,
2810 struct netlink_ext_ack
*extack
)
2812 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
2813 struct pedit_headers_action hdrs
[2] = {};
2814 const struct flow_action_entry
*act
;
2818 if (!flow_action_has_entries(flow_action
))
2821 attr
->flow_tag
= MLX5_FS_DEFAULT_FLOW_TAG
;
2823 flow_action_for_each(i
, act
, flow_action
) {
2825 case FLOW_ACTION_DROP
:
2826 action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
;
2827 if (MLX5_CAP_FLOWTABLE(priv
->mdev
,
2828 flow_table_properties_nic_receive
.flow_counter
))
2829 action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
2831 case FLOW_ACTION_MANGLE
:
2832 case FLOW_ACTION_ADD
:
2833 err
= parse_tc_pedit_action(priv
, act
, MLX5_FLOW_NAMESPACE_KERNEL
,
2834 parse_attr
, hdrs
, extack
);
2838 action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
|
2839 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
2841 case FLOW_ACTION_VLAN_MANGLE
:
2842 err
= add_vlan_rewrite_action(priv
,
2843 MLX5_FLOW_NAMESPACE_KERNEL
,
2844 act
, parse_attr
, hdrs
,
2850 case FLOW_ACTION_CSUM
:
2851 if (csum_offload_supported(priv
, action
,
2857 case FLOW_ACTION_REDIRECT
: {
2858 struct net_device
*peer_dev
= act
->dev
;
2860 if (priv
->netdev
->netdev_ops
== peer_dev
->netdev_ops
&&
2861 same_hw_devs(priv
, netdev_priv(peer_dev
))) {
2862 parse_attr
->mirred_ifindex
[0] = peer_dev
->ifindex
;
2863 flow_flag_set(flow
, HAIRPIN
);
2864 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
2865 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
2867 NL_SET_ERR_MSG_MOD(extack
,
2868 "device is not on same HW, can't offload");
2869 netdev_warn(priv
->netdev
, "device %s not on same HW, can't offload\n",
2875 case FLOW_ACTION_MARK
: {
2876 u32 mark
= act
->mark
;
2878 if (mark
& ~MLX5E_TC_FLOW_ID_MASK
) {
2879 NL_SET_ERR_MSG_MOD(extack
,
2880 "Bad flow mark - only 16 bit is supported");
2884 attr
->flow_tag
= mark
;
2885 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
2889 NL_SET_ERR_MSG_MOD(extack
, "The offload action is not supported");
2894 if (hdrs
[TCA_PEDIT_KEY_EX_CMD_SET
].pedits
||
2895 hdrs
[TCA_PEDIT_KEY_EX_CMD_ADD
].pedits
) {
2896 err
= alloc_tc_pedit_action(priv
, MLX5_FLOW_NAMESPACE_KERNEL
,
2897 parse_attr
, hdrs
, &action
, extack
);
2900 /* in case all pedit actions are skipped, remove the MOD_HDR
2903 if (parse_attr
->num_mod_hdr_actions
== 0) {
2904 action
&= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
2905 kfree(parse_attr
->mod_hdr_actions
);
2909 attr
->action
= action
;
2910 if (!actions_match_supported(priv
, flow_action
, parse_attr
, flow
, extack
))
2917 const struct ip_tunnel_key
*ip_tun_key
;
2918 struct mlx5e_tc_tunnel
*tc_tunnel
;
2921 static inline int cmp_encap_info(struct encap_key
*a
,
2922 struct encap_key
*b
)
2924 return memcmp(a
->ip_tun_key
, b
->ip_tun_key
, sizeof(*a
->ip_tun_key
)) ||
2925 a
->tc_tunnel
->tunnel_type
!= b
->tc_tunnel
->tunnel_type
;
2928 static inline int hash_encap_info(struct encap_key
*key
)
2930 return jhash(key
->ip_tun_key
, sizeof(*key
->ip_tun_key
),
2931 key
->tc_tunnel
->tunnel_type
);
2935 static bool is_merged_eswitch_dev(struct mlx5e_priv
*priv
,
2936 struct net_device
*peer_netdev
)
2938 struct mlx5e_priv
*peer_priv
;
2940 peer_priv
= netdev_priv(peer_netdev
);
2942 return (MLX5_CAP_ESW(priv
->mdev
, merged_eswitch
) &&
2943 mlx5e_eswitch_rep(priv
->netdev
) &&
2944 mlx5e_eswitch_rep(peer_netdev
) &&
2945 same_hw_devs(priv
, peer_priv
));
2950 bool mlx5e_encap_take(struct mlx5e_encap_entry
*e
)
2952 return refcount_inc_not_zero(&e
->refcnt
);
2955 static struct mlx5e_encap_entry
*
2956 mlx5e_encap_get(struct mlx5e_priv
*priv
, struct encap_key
*key
,
2959 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
2960 struct mlx5e_encap_entry
*e
;
2961 struct encap_key e_key
;
2963 hash_for_each_possible_rcu(esw
->offloads
.encap_tbl
, e
,
2964 encap_hlist
, hash_key
) {
2965 e_key
.ip_tun_key
= &e
->tun_info
->key
;
2966 e_key
.tc_tunnel
= e
->tunnel
;
2967 if (!cmp_encap_info(&e_key
, key
) &&
2968 mlx5e_encap_take(e
))
2975 static int mlx5e_attach_encap(struct mlx5e_priv
*priv
,
2976 struct mlx5e_tc_flow
*flow
,
2977 struct net_device
*mirred_dev
,
2979 struct netlink_ext_ack
*extack
,
2980 struct net_device
**encap_dev
,
2983 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
2984 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
2985 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
2986 const struct ip_tunnel_info
*tun_info
;
2987 struct encap_key key
;
2988 struct mlx5e_encap_entry
*e
;
2989 unsigned short family
;
2993 parse_attr
= attr
->parse_attr
;
2994 tun_info
= parse_attr
->tun_info
[out_index
];
2995 family
= ip_tunnel_info_af(tun_info
);
2996 key
.ip_tun_key
= &tun_info
->key
;
2997 key
.tc_tunnel
= mlx5e_get_tc_tun(mirred_dev
);
2998 if (!key
.tc_tunnel
) {
2999 NL_SET_ERR_MSG_MOD(extack
, "Unsupported tunnel");
3003 hash_key
= hash_encap_info(&key
);
3005 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
3006 e
= mlx5e_encap_get(priv
, &key
, hash_key
);
3008 /* must verify if encap is valid or not */
3010 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3011 wait_for_completion(&e
->res_ready
);
3013 /* Protect against concurrent neigh update. */
3014 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
3015 if (e
->compl_result
< 0) {
3022 e
= kzalloc(sizeof(*e
), GFP_KERNEL
);
3028 refcount_set(&e
->refcnt
, 1);
3029 init_completion(&e
->res_ready
);
3031 e
->tun_info
= tun_info
;
3032 err
= mlx5e_tc_tun_init_encap_attr(mirred_dev
, priv
, e
, extack
);
3039 INIT_LIST_HEAD(&e
->flows
);
3040 hash_add_rcu(esw
->offloads
.encap_tbl
, &e
->encap_hlist
, hash_key
);
3041 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3043 if (family
== AF_INET
)
3044 err
= mlx5e_tc_tun_create_header_ipv4(priv
, mirred_dev
, e
);
3045 else if (family
== AF_INET6
)
3046 err
= mlx5e_tc_tun_create_header_ipv6(priv
, mirred_dev
, e
);
3048 /* Protect against concurrent neigh update. */
3049 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
3050 complete_all(&e
->res_ready
);
3052 e
->compl_result
= err
;
3055 e
->compl_result
= 1;
3058 flow
->encaps
[out_index
].e
= e
;
3059 list_add(&flow
->encaps
[out_index
].list
, &e
->flows
);
3060 flow
->encaps
[out_index
].index
= out_index
;
3061 *encap_dev
= e
->out_dev
;
3062 if (e
->flags
& MLX5_ENCAP_ENTRY_VALID
) {
3063 attr
->dests
[out_index
].pkt_reformat
= e
->pkt_reformat
;
3064 attr
->dests
[out_index
].flags
|= MLX5_ESW_DEST_ENCAP_VALID
;
3065 *encap_valid
= true;
3067 *encap_valid
= false;
3069 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3074 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3076 mlx5e_encap_put(priv
, e
);
3080 static int parse_tc_vlan_action(struct mlx5e_priv
*priv
,
3081 const struct flow_action_entry
*act
,
3082 struct mlx5_esw_flow_attr
*attr
,
3085 u8 vlan_idx
= attr
->total_vlan
;
3087 if (vlan_idx
>= MLX5_FS_VLAN_DEPTH
)
3091 case FLOW_ACTION_VLAN_POP
:
3093 if (!mlx5_eswitch_vlan_actions_supported(priv
->mdev
,
3094 MLX5_FS_VLAN_DEPTH
))
3097 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2
;
3099 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
3102 case FLOW_ACTION_VLAN_PUSH
:
3103 attr
->vlan_vid
[vlan_idx
] = act
->vlan
.vid
;
3104 attr
->vlan_prio
[vlan_idx
] = act
->vlan
.prio
;
3105 attr
->vlan_proto
[vlan_idx
] = act
->vlan
.proto
;
3106 if (!attr
->vlan_proto
[vlan_idx
])
3107 attr
->vlan_proto
[vlan_idx
] = htons(ETH_P_8021Q
);
3110 if (!mlx5_eswitch_vlan_actions_supported(priv
->mdev
,
3111 MLX5_FS_VLAN_DEPTH
))
3114 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2
;
3116 if (!mlx5_eswitch_vlan_actions_supported(priv
->mdev
, 1) &&
3117 (act
->vlan
.proto
!= htons(ETH_P_8021Q
) ||
3121 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
;
3128 attr
->total_vlan
= vlan_idx
+ 1;
3133 static int add_vlan_push_action(struct mlx5e_priv
*priv
,
3134 struct mlx5_esw_flow_attr
*attr
,
3135 struct net_device
**out_dev
,
3138 struct net_device
*vlan_dev
= *out_dev
;
3139 struct flow_action_entry vlan_act
= {
3140 .id
= FLOW_ACTION_VLAN_PUSH
,
3141 .vlan
.vid
= vlan_dev_vlan_id(vlan_dev
),
3142 .vlan
.proto
= vlan_dev_vlan_proto(vlan_dev
),
3147 err
= parse_tc_vlan_action(priv
, &vlan_act
, attr
, action
);
3151 *out_dev
= dev_get_by_index_rcu(dev_net(vlan_dev
),
3152 dev_get_iflink(vlan_dev
));
3153 if (is_vlan_dev(*out_dev
))
3154 err
= add_vlan_push_action(priv
, attr
, out_dev
, action
);
3159 static int add_vlan_pop_action(struct mlx5e_priv
*priv
,
3160 struct mlx5_esw_flow_attr
*attr
,
3163 int nest_level
= vlan_get_encap_level(attr
->parse_attr
->filter_dev
);
3164 struct flow_action_entry vlan_act
= {
3165 .id
= FLOW_ACTION_VLAN_POP
,
3169 while (nest_level
--) {
3170 err
= parse_tc_vlan_action(priv
, &vlan_act
, attr
, action
);
3178 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv
*priv
,
3179 struct net_device
*out_dev
)
3181 if (is_merged_eswitch_dev(priv
, out_dev
))
3184 return mlx5e_eswitch_rep(out_dev
) &&
3185 same_hw_devs(priv
, netdev_priv(out_dev
));
3188 static int parse_tc_fdb_actions(struct mlx5e_priv
*priv
,
3189 struct flow_action
*flow_action
,
3190 struct mlx5e_tc_flow
*flow
,
3191 struct netlink_ext_ack
*extack
)
3193 struct pedit_headers_action hdrs
[2] = {};
3194 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3195 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
3196 struct mlx5e_tc_flow_parse_attr
*parse_attr
= attr
->parse_attr
;
3197 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3198 const struct ip_tunnel_info
*info
= NULL
;
3199 const struct flow_action_entry
*act
;
3204 if (!flow_action_has_entries(flow_action
))
3207 flow_action_for_each(i
, act
, flow_action
) {
3209 case FLOW_ACTION_DROP
:
3210 action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
|
3211 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3213 case FLOW_ACTION_MANGLE
:
3214 case FLOW_ACTION_ADD
:
3215 err
= parse_tc_pedit_action(priv
, act
, MLX5_FLOW_NAMESPACE_FDB
,
3216 parse_attr
, hdrs
, extack
);
3220 action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
3221 attr
->split_count
= attr
->out_count
;
3223 case FLOW_ACTION_CSUM
:
3224 if (csum_offload_supported(priv
, action
,
3225 act
->csum_flags
, extack
))
3229 case FLOW_ACTION_REDIRECT
:
3230 case FLOW_ACTION_MIRRED
: {
3231 struct mlx5e_priv
*out_priv
;
3232 struct net_device
*out_dev
;
3236 /* out_dev is NULL when filters with
3237 * non-existing mirred device are replayed to
3243 if (attr
->out_count
>= MLX5_MAX_FLOW_FWD_VPORTS
) {
3244 NL_SET_ERR_MSG_MOD(extack
,
3245 "can't support more output ports, can't offload forwarding");
3246 pr_err("can't support more than %d output ports, can't offload forwarding\n",
3251 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
3252 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3253 if (netdev_port_same_parent_id(priv
->netdev
, out_dev
)) {
3254 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3255 struct net_device
*uplink_dev
= mlx5_eswitch_uplink_get_proto_dev(esw
, REP_ETH
);
3256 struct net_device
*uplink_upper
;
3260 netdev_master_upper_dev_get_rcu(uplink_dev
);
3262 netif_is_lag_master(uplink_upper
) &&
3263 uplink_upper
== out_dev
)
3264 out_dev
= uplink_dev
;
3267 if (is_vlan_dev(out_dev
)) {
3268 err
= add_vlan_push_action(priv
, attr
,
3275 if (is_vlan_dev(parse_attr
->filter_dev
)) {
3276 err
= add_vlan_pop_action(priv
, attr
,
3282 if (!mlx5e_is_valid_eswitch_fwd_dev(priv
, out_dev
)) {
3283 NL_SET_ERR_MSG_MOD(extack
,
3284 "devices are not on same switch HW, can't offload forwarding");
3285 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
3286 priv
->netdev
->name
, out_dev
->name
);
3290 out_priv
= netdev_priv(out_dev
);
3291 rpriv
= out_priv
->ppriv
;
3292 attr
->dests
[attr
->out_count
].rep
= rpriv
->rep
;
3293 attr
->dests
[attr
->out_count
].mdev
= out_priv
->mdev
;
3296 parse_attr
->mirred_ifindex
[attr
->out_count
] =
3298 parse_attr
->tun_info
[attr
->out_count
] = info
;
3300 attr
->dests
[attr
->out_count
].flags
|=
3301 MLX5_ESW_DEST_ENCAP
;
3303 /* attr->dests[].rep is resolved when we
3306 } else if (parse_attr
->filter_dev
!= priv
->netdev
) {
3307 /* All mlx5 devices are called to configure
3308 * high level device filters. Therefore, the
3309 * *attempt* to install a filter on invalid
3310 * eswitch should not trigger an explicit error
3314 NL_SET_ERR_MSG_MOD(extack
,
3315 "devices are not on same switch HW, can't offload forwarding");
3316 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
3317 priv
->netdev
->name
, out_dev
->name
);
3322 case FLOW_ACTION_TUNNEL_ENCAP
:
3330 case FLOW_ACTION_VLAN_PUSH
:
3331 case FLOW_ACTION_VLAN_POP
:
3332 if (act
->id
== FLOW_ACTION_VLAN_PUSH
&&
3333 (action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
)) {
3334 /* Replace vlan pop+push with vlan modify */
3335 action
&= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
3336 err
= add_vlan_rewrite_action(priv
,
3337 MLX5_FLOW_NAMESPACE_FDB
,
3338 act
, parse_attr
, hdrs
,
3341 err
= parse_tc_vlan_action(priv
, act
, attr
, &action
);
3346 attr
->split_count
= attr
->out_count
;
3348 case FLOW_ACTION_VLAN_MANGLE
:
3349 err
= add_vlan_rewrite_action(priv
,
3350 MLX5_FLOW_NAMESPACE_FDB
,
3351 act
, parse_attr
, hdrs
,
3356 attr
->split_count
= attr
->out_count
;
3358 case FLOW_ACTION_TUNNEL_DECAP
:
3359 action
|= MLX5_FLOW_CONTEXT_ACTION_DECAP
;
3361 case FLOW_ACTION_GOTO
: {
3362 u32 dest_chain
= act
->chain_index
;
3363 u32 max_chain
= mlx5_eswitch_get_chain_range(esw
);
3365 if (dest_chain
<= attr
->chain
) {
3366 NL_SET_ERR_MSG(extack
, "Goto earlier chain isn't supported");
3369 if (dest_chain
> max_chain
) {
3370 NL_SET_ERR_MSG(extack
, "Requested destination chain is out of supported range");
3373 action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3374 attr
->dest_chain
= dest_chain
;
3378 NL_SET_ERR_MSG_MOD(extack
, "The offload action is not supported");
3383 if (MLX5_CAP_GEN(esw
->dev
, prio_tag_required
) &&
3384 action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
) {
3385 /* For prio tag mode, replace vlan pop with rewrite vlan prio
3388 action
&= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
3389 err
= add_vlan_prio_tag_rewrite_action(priv
, parse_attr
, hdrs
,
3395 if (hdrs
[TCA_PEDIT_KEY_EX_CMD_SET
].pedits
||
3396 hdrs
[TCA_PEDIT_KEY_EX_CMD_ADD
].pedits
) {
3397 err
= alloc_tc_pedit_action(priv
, MLX5_FLOW_NAMESPACE_FDB
,
3398 parse_attr
, hdrs
, &action
, extack
);
3401 /* in case all pedit actions are skipped, remove the MOD_HDR
3402 * flag. we might have set split_count either by pedit or
3403 * pop/push. if there is no pop/push either, reset it too.
3405 if (parse_attr
->num_mod_hdr_actions
== 0) {
3406 action
&= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
3407 kfree(parse_attr
->mod_hdr_actions
);
3408 if (!((action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
) ||
3409 (action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
)))
3410 attr
->split_count
= 0;
3414 attr
->action
= action
;
3415 if (!actions_match_supported(priv
, flow_action
, parse_attr
, flow
, extack
))
3418 if (attr
->dest_chain
) {
3419 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
3420 NL_SET_ERR_MSG(extack
, "Mirroring goto chain rules isn't supported");
3423 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
3426 if (attr
->split_count
> 0 && !mlx5_esw_has_fwd_fdb(priv
->mdev
)) {
3427 NL_SET_ERR_MSG_MOD(extack
,
3428 "current firmware doesn't support split rule for port mirroring");
3429 netdev_warn_once(priv
->netdev
, "current firmware doesn't support split rule for port mirroring\n");
3436 static void get_flags(int flags
, unsigned long *flow_flags
)
3438 unsigned long __flow_flags
= 0;
3440 if (flags
& MLX5_TC_FLAG(INGRESS
))
3441 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_INGRESS
);
3442 if (flags
& MLX5_TC_FLAG(EGRESS
))
3443 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_EGRESS
);
3445 if (flags
& MLX5_TC_FLAG(ESW_OFFLOAD
))
3446 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH
);
3447 if (flags
& MLX5_TC_FLAG(NIC_OFFLOAD
))
3448 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_NIC
);
3450 *flow_flags
= __flow_flags
;
3453 static const struct rhashtable_params tc_ht_params
= {
3454 .head_offset
= offsetof(struct mlx5e_tc_flow
, node
),
3455 .key_offset
= offsetof(struct mlx5e_tc_flow
, cookie
),
3456 .key_len
= sizeof(((struct mlx5e_tc_flow
*)0)->cookie
),
3457 .automatic_shrinking
= true,
3460 static struct rhashtable
*get_tc_ht(struct mlx5e_priv
*priv
,
3461 unsigned long flags
)
3463 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3464 struct mlx5e_rep_priv
*uplink_rpriv
;
3466 if (flags
& MLX5_TC_FLAG(ESW_OFFLOAD
)) {
3467 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
3468 return &uplink_rpriv
->uplink_priv
.tc_ht
;
3469 } else /* NIC offload */
3470 return &priv
->fs
.tc
.ht
;
3473 static bool is_peer_flow_needed(struct mlx5e_tc_flow
*flow
)
3475 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
3476 bool is_rep_ingress
= attr
->in_rep
->vport
!= MLX5_VPORT_UPLINK
&&
3477 flow_flag_test(flow
, INGRESS
);
3478 bool act_is_encap
= !!(attr
->action
&
3479 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT
);
3480 bool esw_paired
= mlx5_devcom_is_paired(attr
->in_mdev
->priv
.devcom
,
3481 MLX5_DEVCOM_ESW_OFFLOADS
);
3486 if ((mlx5_lag_is_sriov(attr
->in_mdev
) ||
3487 mlx5_lag_is_multipath(attr
->in_mdev
)) &&
3488 (is_rep_ingress
|| act_is_encap
))
3495 mlx5e_alloc_flow(struct mlx5e_priv
*priv
, int attr_size
,
3496 struct flow_cls_offload
*f
, unsigned long flow_flags
,
3497 struct mlx5e_tc_flow_parse_attr
**__parse_attr
,
3498 struct mlx5e_tc_flow
**__flow
)
3500 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3501 struct mlx5e_tc_flow
*flow
;
3504 flow
= kzalloc(sizeof(*flow
) + attr_size
, GFP_KERNEL
);
3505 parse_attr
= kvzalloc(sizeof(*parse_attr
), GFP_KERNEL
);
3506 if (!parse_attr
|| !flow
) {
3511 flow
->cookie
= f
->cookie
;
3512 flow
->flags
= flow_flags
;
3514 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++)
3515 INIT_LIST_HEAD(&flow
->encaps
[out_index
].list
);
3516 INIT_LIST_HEAD(&flow
->mod_hdr
);
3517 INIT_LIST_HEAD(&flow
->hairpin
);
3518 refcount_set(&flow
->refcnt
, 1);
3519 init_completion(&flow
->init_done
);
3522 *__parse_attr
= parse_attr
;
3533 mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr
*esw_attr
,
3534 struct mlx5e_priv
*priv
,
3535 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3536 struct flow_cls_offload
*f
,
3537 struct mlx5_eswitch_rep
*in_rep
,
3538 struct mlx5_core_dev
*in_mdev
)
3540 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3542 esw_attr
->parse_attr
= parse_attr
;
3543 esw_attr
->chain
= f
->common
.chain_index
;
3544 esw_attr
->prio
= f
->common
.prio
;
3546 esw_attr
->in_rep
= in_rep
;
3547 esw_attr
->in_mdev
= in_mdev
;
3549 if (MLX5_CAP_ESW(esw
->dev
, counter_eswitch_affinity
) ==
3550 MLX5_COUNTER_SOURCE_ESWITCH
)
3551 esw_attr
->counter_dev
= in_mdev
;
3553 esw_attr
->counter_dev
= priv
->mdev
;
3556 static struct mlx5e_tc_flow
*
3557 __mlx5e_add_fdb_flow(struct mlx5e_priv
*priv
,
3558 struct flow_cls_offload
*f
,
3559 unsigned long flow_flags
,
3560 struct net_device
*filter_dev
,
3561 struct mlx5_eswitch_rep
*in_rep
,
3562 struct mlx5_core_dev
*in_mdev
)
3564 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
3565 struct netlink_ext_ack
*extack
= f
->common
.extack
;
3566 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3567 struct mlx5e_tc_flow
*flow
;
3570 flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH
);
3571 attr_size
= sizeof(struct mlx5_esw_flow_attr
);
3572 err
= mlx5e_alloc_flow(priv
, attr_size
, f
, flow_flags
,
3573 &parse_attr
, &flow
);
3577 parse_attr
->filter_dev
= filter_dev
;
3578 mlx5e_flow_esw_attr_init(flow
->esw_attr
,
3580 f
, in_rep
, in_mdev
);
3582 err
= parse_cls_flower(flow
->priv
, flow
, &parse_attr
->spec
,
3587 err
= parse_tc_fdb_actions(priv
, &rule
->action
, flow
, extack
);
3591 err
= mlx5e_tc_add_fdb_flow(priv
, flow
, extack
);
3592 complete_all(&flow
->init_done
);
3594 if (!(err
== -ENETUNREACH
&& mlx5_lag_is_multipath(in_mdev
)))
3597 add_unready_flow(flow
);
3603 mlx5e_flow_put(priv
, flow
);
3605 return ERR_PTR(err
);
3608 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload
*f
,
3609 struct mlx5e_tc_flow
*flow
,
3610 unsigned long flow_flags
)
3612 struct mlx5e_priv
*priv
= flow
->priv
, *peer_priv
;
3613 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
, *peer_esw
;
3614 struct mlx5_devcom
*devcom
= priv
->mdev
->priv
.devcom
;
3615 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3616 struct mlx5e_rep_priv
*peer_urpriv
;
3617 struct mlx5e_tc_flow
*peer_flow
;
3618 struct mlx5_core_dev
*in_mdev
;
3621 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
3625 peer_urpriv
= mlx5_eswitch_get_uplink_priv(peer_esw
, REP_ETH
);
3626 peer_priv
= netdev_priv(peer_urpriv
->netdev
);
3628 /* in_mdev is assigned of which the packet originated from.
3629 * So packets redirected to uplink use the same mdev of the
3630 * original flow and packets redirected from uplink use the
3633 if (flow
->esw_attr
->in_rep
->vport
== MLX5_VPORT_UPLINK
)
3634 in_mdev
= peer_priv
->mdev
;
3636 in_mdev
= priv
->mdev
;
3638 parse_attr
= flow
->esw_attr
->parse_attr
;
3639 peer_flow
= __mlx5e_add_fdb_flow(peer_priv
, f
, flow_flags
,
3640 parse_attr
->filter_dev
,
3641 flow
->esw_attr
->in_rep
, in_mdev
);
3642 if (IS_ERR(peer_flow
)) {
3643 err
= PTR_ERR(peer_flow
);
3647 flow
->peer_flow
= peer_flow
;
3648 flow_flag_set(flow
, DUP
);
3649 mutex_lock(&esw
->offloads
.peer_mutex
);
3650 list_add_tail(&flow
->peer
, &esw
->offloads
.peer_flows
);
3651 mutex_unlock(&esw
->offloads
.peer_mutex
);
3654 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
3659 mlx5e_add_fdb_flow(struct mlx5e_priv
*priv
,
3660 struct flow_cls_offload
*f
,
3661 unsigned long flow_flags
,
3662 struct net_device
*filter_dev
,
3663 struct mlx5e_tc_flow
**__flow
)
3665 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3666 struct mlx5_eswitch_rep
*in_rep
= rpriv
->rep
;
3667 struct mlx5_core_dev
*in_mdev
= priv
->mdev
;
3668 struct mlx5e_tc_flow
*flow
;
3671 flow
= __mlx5e_add_fdb_flow(priv
, f
, flow_flags
, filter_dev
, in_rep
,
3674 return PTR_ERR(flow
);
3676 if (is_peer_flow_needed(flow
)) {
3677 err
= mlx5e_tc_add_fdb_peer_flow(f
, flow
, flow_flags
);
3679 mlx5e_tc_del_fdb_flow(priv
, flow
);
3693 mlx5e_add_nic_flow(struct mlx5e_priv
*priv
,
3694 struct flow_cls_offload
*f
,
3695 unsigned long flow_flags
,
3696 struct net_device
*filter_dev
,
3697 struct mlx5e_tc_flow
**__flow
)
3699 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
3700 struct netlink_ext_ack
*extack
= f
->common
.extack
;
3701 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3702 struct mlx5e_tc_flow
*flow
;
3705 /* multi-chain not supported for NIC rules */
3706 if (!tc_cls_can_offload_and_chain0(priv
->netdev
, &f
->common
))
3709 flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_NIC
);
3710 attr_size
= sizeof(struct mlx5_nic_flow_attr
);
3711 err
= mlx5e_alloc_flow(priv
, attr_size
, f
, flow_flags
,
3712 &parse_attr
, &flow
);
3716 parse_attr
->filter_dev
= filter_dev
;
3717 err
= parse_cls_flower(flow
->priv
, flow
, &parse_attr
->spec
,
3722 err
= parse_tc_nic_actions(priv
, &rule
->action
, parse_attr
, flow
, extack
);
3726 err
= mlx5e_tc_add_nic_flow(priv
, parse_attr
, flow
, extack
);
3730 flow_flag_set(flow
, OFFLOADED
);
3737 mlx5e_flow_put(priv
, flow
);
3744 mlx5e_tc_add_flow(struct mlx5e_priv
*priv
,
3745 struct flow_cls_offload
*f
,
3746 unsigned long flags
,
3747 struct net_device
*filter_dev
,
3748 struct mlx5e_tc_flow
**flow
)
3750 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3751 unsigned long flow_flags
;
3754 get_flags(flags
, &flow_flags
);
3756 if (!tc_can_offload_extack(priv
->netdev
, f
->common
.extack
))
3759 if (esw
&& esw
->mode
== MLX5_ESWITCH_OFFLOADS
)
3760 err
= mlx5e_add_fdb_flow(priv
, f
, flow_flags
,
3763 err
= mlx5e_add_nic_flow(priv
, f
, flow_flags
,
3769 int mlx5e_configure_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
3770 struct flow_cls_offload
*f
, unsigned long flags
)
3772 struct netlink_ext_ack
*extack
= f
->common
.extack
;
3773 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
3774 struct mlx5e_tc_flow
*flow
;
3778 flow
= rhashtable_lookup(tc_ht
, &f
->cookie
, tc_ht_params
);
3781 NL_SET_ERR_MSG_MOD(extack
,
3782 "flow cookie already exists, ignoring");
3783 netdev_warn_once(priv
->netdev
,
3784 "flow cookie %lx already exists, ignoring\n",
3790 trace_mlx5e_configure_flower(f
);
3791 err
= mlx5e_tc_add_flow(priv
, f
, flags
, dev
, &flow
);
3795 err
= rhashtable_lookup_insert_fast(tc_ht
, &flow
->node
, tc_ht_params
);
3802 mlx5e_flow_put(priv
, flow
);
3807 static bool same_flow_direction(struct mlx5e_tc_flow
*flow
, int flags
)
3809 bool dir_ingress
= !!(flags
& MLX5_TC_FLAG(INGRESS
));
3810 bool dir_egress
= !!(flags
& MLX5_TC_FLAG(EGRESS
));
3812 return flow_flag_test(flow
, INGRESS
) == dir_ingress
&&
3813 flow_flag_test(flow
, EGRESS
) == dir_egress
;
3816 int mlx5e_delete_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
3817 struct flow_cls_offload
*f
, unsigned long flags
)
3819 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
3820 struct mlx5e_tc_flow
*flow
;
3824 flow
= rhashtable_lookup_fast(tc_ht
, &f
->cookie
, tc_ht_params
);
3825 if (!flow
|| !same_flow_direction(flow
, flags
)) {
3830 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
3833 if (flow_flag_test_and_set(flow
, DELETED
)) {
3837 rhashtable_remove_fast(tc_ht
, &flow
->node
, tc_ht_params
);
3840 trace_mlx5e_delete_flower(f
);
3841 mlx5e_flow_put(priv
, flow
);
3850 int mlx5e_stats_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
3851 struct flow_cls_offload
*f
, unsigned long flags
)
3853 struct mlx5_devcom
*devcom
= priv
->mdev
->priv
.devcom
;
3854 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
3855 struct mlx5_eswitch
*peer_esw
;
3856 struct mlx5e_tc_flow
*flow
;
3857 struct mlx5_fc
*counter
;
3864 flow
= mlx5e_flow_get(rhashtable_lookup(tc_ht
, &f
->cookie
,
3868 return PTR_ERR(flow
);
3870 if (!same_flow_direction(flow
, flags
)) {
3875 if (mlx5e_is_offloaded_flow(flow
)) {
3876 counter
= mlx5e_tc_get_counter(flow
);
3880 mlx5_fc_query_cached(counter
, &bytes
, &packets
, &lastuse
);
3883 /* Under multipath it's possible for one rule to be currently
3884 * un-offloaded while the other rule is offloaded.
3886 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
3890 if (flow_flag_test(flow
, DUP
) &&
3891 flow_flag_test(flow
->peer_flow
, OFFLOADED
)) {
3896 counter
= mlx5e_tc_get_counter(flow
->peer_flow
);
3898 goto no_peer_counter
;
3899 mlx5_fc_query_cached(counter
, &bytes2
, &packets2
, &lastuse2
);
3902 packets
+= packets2
;
3903 lastuse
= max_t(u64
, lastuse
, lastuse2
);
3907 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
3909 flow_stats_update(&f
->stats
, bytes
, packets
, lastuse
);
3910 trace_mlx5e_stats_flower(f
);
3912 mlx5e_flow_put(priv
, flow
);
3916 static int apply_police_params(struct mlx5e_priv
*priv
, u32 rate
,
3917 struct netlink_ext_ack
*extack
)
3919 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3920 struct mlx5_eswitch
*esw
;
3925 esw
= priv
->mdev
->priv
.eswitch
;
3926 /* rate is given in bytes/sec.
3927 * First convert to bits/sec and then round to the nearest mbit/secs.
3928 * mbit means million bits.
3929 * Moreover, if rate is non zero we choose to configure to a minimum of
3932 rate_mbps
= rate
? max_t(u32
, (rate
* 8 + 500000) / 1000000, 1) : 0;
3933 vport_num
= rpriv
->rep
->vport
;
3935 err
= mlx5_esw_modify_vport_rate(esw
, vport_num
, rate_mbps
);
3937 NL_SET_ERR_MSG_MOD(extack
, "failed applying action to hardware");
3942 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv
*priv
,
3943 struct flow_action
*flow_action
,
3944 struct netlink_ext_ack
*extack
)
3946 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3947 const struct flow_action_entry
*act
;
3951 if (!flow_action_has_entries(flow_action
)) {
3952 NL_SET_ERR_MSG_MOD(extack
, "matchall called with no action");
3956 if (!flow_offload_has_one_action(flow_action
)) {
3957 NL_SET_ERR_MSG_MOD(extack
, "matchall policing support only a single action");
3961 flow_action_for_each(i
, act
, flow_action
) {
3963 case FLOW_ACTION_POLICE
:
3964 err
= apply_police_params(priv
, act
->police
.rate_bytes_ps
, extack
);
3968 rpriv
->prev_vf_vport_stats
= priv
->stats
.vf_vport
;
3971 NL_SET_ERR_MSG_MOD(extack
, "mlx5 supports only police action for matchall");
3979 int mlx5e_tc_configure_matchall(struct mlx5e_priv
*priv
,
3980 struct tc_cls_matchall_offload
*ma
)
3982 struct netlink_ext_ack
*extack
= ma
->common
.extack
;
3983 int prio
= TC_H_MAJ(ma
->common
.prio
) >> 16;
3986 NL_SET_ERR_MSG_MOD(extack
, "only priority 1 is supported");
3990 return scan_tc_matchall_fdb_actions(priv
, &ma
->rule
->action
, extack
);
3993 int mlx5e_tc_delete_matchall(struct mlx5e_priv
*priv
,
3994 struct tc_cls_matchall_offload
*ma
)
3996 struct netlink_ext_ack
*extack
= ma
->common
.extack
;
3998 return apply_police_params(priv
, 0, extack
);
4001 void mlx5e_tc_stats_matchall(struct mlx5e_priv
*priv
,
4002 struct tc_cls_matchall_offload
*ma
)
4004 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4005 struct rtnl_link_stats64 cur_stats
;
4009 cur_stats
= priv
->stats
.vf_vport
;
4010 dpkts
= cur_stats
.rx_packets
- rpriv
->prev_vf_vport_stats
.rx_packets
;
4011 dbytes
= cur_stats
.rx_bytes
- rpriv
->prev_vf_vport_stats
.rx_bytes
;
4012 rpriv
->prev_vf_vport_stats
= cur_stats
;
4013 flow_stats_update(&ma
->stats
, dpkts
, dbytes
, jiffies
);
4016 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv
*priv
,
4017 struct mlx5e_priv
*peer_priv
)
4019 struct mlx5_core_dev
*peer_mdev
= peer_priv
->mdev
;
4020 struct mlx5e_hairpin_entry
*hpe
, *tmp
;
4021 LIST_HEAD(init_wait_list
);
4025 if (!same_hw_devs(priv
, peer_priv
))
4028 peer_vhca_id
= MLX5_CAP_GEN(peer_mdev
, vhca_id
);
4030 mutex_lock(&priv
->fs
.tc
.hairpin_tbl_lock
);
4031 hash_for_each(priv
->fs
.tc
.hairpin_tbl
, bkt
, hpe
, hairpin_hlist
)
4032 if (refcount_inc_not_zero(&hpe
->refcnt
))
4033 list_add(&hpe
->dead_peer_wait_list
, &init_wait_list
);
4034 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
4036 list_for_each_entry_safe(hpe
, tmp
, &init_wait_list
, dead_peer_wait_list
) {
4037 wait_for_completion(&hpe
->res_ready
);
4038 if (!IS_ERR_OR_NULL(hpe
->hp
) && hpe
->peer_vhca_id
== peer_vhca_id
)
4039 hpe
->hp
->pair
->peer_gone
= true;
4041 mlx5e_hairpin_put(priv
, hpe
);
4045 static int mlx5e_tc_netdev_event(struct notifier_block
*this,
4046 unsigned long event
, void *ptr
)
4048 struct net_device
*ndev
= netdev_notifier_info_to_dev(ptr
);
4049 struct mlx5e_flow_steering
*fs
;
4050 struct mlx5e_priv
*peer_priv
;
4051 struct mlx5e_tc_table
*tc
;
4052 struct mlx5e_priv
*priv
;
4054 if (ndev
->netdev_ops
!= &mlx5e_netdev_ops
||
4055 event
!= NETDEV_UNREGISTER
||
4056 ndev
->reg_state
== NETREG_REGISTERED
)
4059 tc
= container_of(this, struct mlx5e_tc_table
, netdevice_nb
);
4060 fs
= container_of(tc
, struct mlx5e_flow_steering
, tc
);
4061 priv
= container_of(fs
, struct mlx5e_priv
, fs
);
4062 peer_priv
= netdev_priv(ndev
);
4063 if (priv
== peer_priv
||
4064 !(priv
->netdev
->features
& NETIF_F_HW_TC
))
4067 mlx5e_tc_hairpin_update_dead_peer(priv
, peer_priv
);
4072 int mlx5e_tc_nic_init(struct mlx5e_priv
*priv
)
4074 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
4077 mutex_init(&tc
->t_lock
);
4078 mutex_init(&tc
->mod_hdr
.lock
);
4079 hash_init(tc
->mod_hdr
.hlist
);
4080 mutex_init(&tc
->hairpin_tbl_lock
);
4081 hash_init(tc
->hairpin_tbl
);
4083 err
= rhashtable_init(&tc
->ht
, &tc_ht_params
);
4087 tc
->netdevice_nb
.notifier_call
= mlx5e_tc_netdev_event
;
4088 if (register_netdevice_notifier(&tc
->netdevice_nb
)) {
4089 tc
->netdevice_nb
.notifier_call
= NULL
;
4090 mlx5_core_warn(priv
->mdev
, "Failed to register netdev notifier\n");
4096 static void _mlx5e_tc_del_flow(void *ptr
, void *arg
)
4098 struct mlx5e_tc_flow
*flow
= ptr
;
4099 struct mlx5e_priv
*priv
= flow
->priv
;
4101 mlx5e_tc_del_flow(priv
, flow
);
4105 void mlx5e_tc_nic_cleanup(struct mlx5e_priv
*priv
)
4107 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
4109 if (tc
->netdevice_nb
.notifier_call
)
4110 unregister_netdevice_notifier(&tc
->netdevice_nb
);
4112 mutex_destroy(&tc
->mod_hdr
.lock
);
4113 mutex_destroy(&tc
->hairpin_tbl_lock
);
4115 rhashtable_destroy(&tc
->ht
);
4117 if (!IS_ERR_OR_NULL(tc
->t
)) {
4118 mlx5_destroy_flow_table(tc
->t
);
4121 mutex_destroy(&tc
->t_lock
);
4124 int mlx5e_tc_esw_init(struct rhashtable
*tc_ht
)
4126 return rhashtable_init(tc_ht
, &tc_ht_params
);
4129 void mlx5e_tc_esw_cleanup(struct rhashtable
*tc_ht
)
4131 rhashtable_free_and_destroy(tc_ht
, _mlx5e_tc_del_flow
, NULL
);
4134 int mlx5e_tc_num_filters(struct mlx5e_priv
*priv
, unsigned long flags
)
4136 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
4138 return atomic_read(&tc_ht
->nelems
);
4141 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch
*esw
)
4143 struct mlx5e_tc_flow
*flow
, *tmp
;
4145 list_for_each_entry_safe(flow
, tmp
, &esw
->offloads
.peer_flows
, peer
)
4146 __mlx5e_tc_del_fdb_peer_flow(flow
);
4149 void mlx5e_tc_reoffload_flows_work(struct work_struct
*work
)
4151 struct mlx5_rep_uplink_priv
*rpriv
=
4152 container_of(work
, struct mlx5_rep_uplink_priv
,
4153 reoffload_flows_work
);
4154 struct mlx5e_tc_flow
*flow
, *tmp
;
4156 mutex_lock(&rpriv
->unready_flows_lock
);
4157 list_for_each_entry_safe(flow
, tmp
, &rpriv
->unready_flows
, unready
) {
4158 if (!mlx5e_tc_add_fdb_flow(flow
->priv
, flow
, NULL
))
4159 unready_flow_del(flow
);
4161 mutex_unlock(&rpriv
->unready_flows_lock
);