2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <linux/refcount.h>
42 #include <linux/completion.h>
43 #include <net/tc_act/tc_mirred.h>
44 #include <net/tc_act/tc_vlan.h>
45 #include <net/tc_act/tc_tunnel_key.h>
46 #include <net/tc_act/tc_pedit.h>
47 #include <net/tc_act/tc_csum.h>
49 #include <net/ipv6_stubs.h>
56 #include "en/tc_tun.h"
57 #include "lib/devcom.h"
58 #include "lib/geneve.h"
59 #include "diag/en_tc_tracepoint.h"
60 #include <asm/div64.h>
62 struct mlx5_nic_flow_attr
{
65 struct mlx5_modify_hdr
*modify_hdr
;
68 struct mlx5_flow_table
*hairpin_ft
;
69 struct mlx5_fc
*counter
;
72 #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
75 MLX5E_TC_FLOW_FLAG_INGRESS
= MLX5E_TC_FLAG_INGRESS_BIT
,
76 MLX5E_TC_FLOW_FLAG_EGRESS
= MLX5E_TC_FLAG_EGRESS_BIT
,
77 MLX5E_TC_FLOW_FLAG_ESWITCH
= MLX5E_TC_FLAG_ESW_OFFLOAD_BIT
,
78 MLX5E_TC_FLOW_FLAG_NIC
= MLX5E_TC_FLAG_NIC_OFFLOAD_BIT
,
79 MLX5E_TC_FLOW_FLAG_OFFLOADED
= MLX5E_TC_FLOW_BASE
,
80 MLX5E_TC_FLOW_FLAG_HAIRPIN
= MLX5E_TC_FLOW_BASE
+ 1,
81 MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS
= MLX5E_TC_FLOW_BASE
+ 2,
82 MLX5E_TC_FLOW_FLAG_SLOW
= MLX5E_TC_FLOW_BASE
+ 3,
83 MLX5E_TC_FLOW_FLAG_DUP
= MLX5E_TC_FLOW_BASE
+ 4,
84 MLX5E_TC_FLOW_FLAG_NOT_READY
= MLX5E_TC_FLOW_BASE
+ 5,
85 MLX5E_TC_FLOW_FLAG_DELETED
= MLX5E_TC_FLOW_BASE
+ 6,
88 #define MLX5E_TC_MAX_SPLITS 1
90 /* Helper struct for accessing a struct containing list_head array.
99 * To access the containing struct from one of the list_head items:
100 * 1. Get the helper item from the list_head item using
102 * container_of(list_head item, helper struct type, list_head field)
103 * 2. Get the contining struct from the helper item and its index in the array:
104 * containing struct =
105 * container_of(helper item, containing struct type, helper field[index])
107 struct encap_flow_item
{
108 struct mlx5e_encap_entry
*e
; /* attached encap instance */
109 struct list_head list
;
113 struct mlx5e_tc_flow
{
114 struct rhash_head node
;
115 struct mlx5e_priv
*priv
;
118 struct mlx5_flow_handle
*rule
[MLX5E_TC_MAX_SPLITS
+ 1];
119 /* Flow can be associated with multiple encap IDs.
120 * The number of encaps is bounded by the number of supported
123 struct encap_flow_item encaps
[MLX5_MAX_FLOW_FWD_VPORTS
];
124 struct mlx5e_tc_flow
*peer_flow
;
125 struct mlx5e_mod_hdr_entry
*mh
; /* attached mod header instance */
126 struct list_head mod_hdr
; /* flows sharing the same mod hdr ID */
127 struct mlx5e_hairpin_entry
*hpe
; /* attached hairpin instance */
128 struct list_head hairpin
; /* flows sharing the same hairpin */
129 struct list_head peer
; /* flows with peer flow */
130 struct list_head unready
; /* flows not ready to be offloaded (e.g due to missing route) */
132 struct list_head tmp_list
; /* temporary flow list used by neigh update */
134 struct rcu_head rcu_head
;
135 struct completion init_done
;
137 struct mlx5_esw_flow_attr esw_attr
[0];
138 struct mlx5_nic_flow_attr nic_attr
[0];
142 struct mlx5e_tc_flow_parse_attr
{
143 const struct ip_tunnel_info
*tun_info
[MLX5_MAX_FLOW_FWD_VPORTS
];
144 struct net_device
*filter_dev
;
145 struct mlx5_flow_spec spec
;
146 int num_mod_hdr_actions
;
147 int max_mod_hdr_actions
;
148 void *mod_hdr_actions
;
149 int mirred_ifindex
[MLX5_MAX_FLOW_FWD_VPORTS
];
152 #define MLX5E_TC_TABLE_NUM_GROUPS 4
153 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
155 struct mlx5e_hairpin
{
156 struct mlx5_hairpin
*pair
;
158 struct mlx5_core_dev
*func_mdev
;
159 struct mlx5e_priv
*func_priv
;
164 struct mlx5e_rqt indir_rqt
;
165 u32 indir_tirn
[MLX5E_NUM_INDIR_TIRS
];
166 struct mlx5e_ttc_table ttc
;
169 struct mlx5e_hairpin_entry
{
170 /* a node of a hash table which keeps all the hairpin entries */
171 struct hlist_node hairpin_hlist
;
173 /* protects flows list */
174 spinlock_t flows_lock
;
175 /* flows sharing the same hairpin */
176 struct list_head flows
;
177 /* hpe's that were not fully initialized when dead peer update event
178 * function traversed them.
180 struct list_head dead_peer_wait_list
;
184 struct mlx5e_hairpin
*hp
;
186 struct completion res_ready
;
194 struct mlx5e_mod_hdr_entry
{
195 /* a node of a hash table which keeps all the mod_hdr entries */
196 struct hlist_node mod_hdr_hlist
;
198 /* protects flows list */
199 spinlock_t flows_lock
;
200 /* flows sharing the same mod_hdr entry */
201 struct list_head flows
;
203 struct mod_hdr_key key
;
205 struct mlx5_modify_hdr
*modify_hdr
;
208 struct completion res_ready
;
212 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
214 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
215 struct mlx5e_tc_flow
*flow
);
217 static struct mlx5e_tc_flow
*mlx5e_flow_get(struct mlx5e_tc_flow
*flow
)
219 if (!flow
|| !refcount_inc_not_zero(&flow
->refcnt
))
220 return ERR_PTR(-EINVAL
);
224 static void mlx5e_flow_put(struct mlx5e_priv
*priv
,
225 struct mlx5e_tc_flow
*flow
)
227 if (refcount_dec_and_test(&flow
->refcnt
)) {
228 mlx5e_tc_del_flow(priv
, flow
);
229 kfree_rcu(flow
, rcu_head
);
233 static void __flow_flag_set(struct mlx5e_tc_flow
*flow
, unsigned long flag
)
235 /* Complete all memory stores before setting bit. */
236 smp_mb__before_atomic();
237 set_bit(flag
, &flow
->flags
);
240 #define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
242 static bool __flow_flag_test_and_set(struct mlx5e_tc_flow
*flow
,
245 /* test_and_set_bit() provides all necessary barriers */
246 return test_and_set_bit(flag
, &flow
->flags
);
249 #define flow_flag_test_and_set(flow, flag) \
250 __flow_flag_test_and_set(flow, \
251 MLX5E_TC_FLOW_FLAG_##flag)
253 static void __flow_flag_clear(struct mlx5e_tc_flow
*flow
, unsigned long flag
)
255 /* Complete all memory stores before clearing bit. */
256 smp_mb__before_atomic();
257 clear_bit(flag
, &flow
->flags
);
260 #define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
261 MLX5E_TC_FLOW_FLAG_##flag)
263 static bool __flow_flag_test(struct mlx5e_tc_flow
*flow
, unsigned long flag
)
265 bool ret
= test_bit(flag
, &flow
->flags
);
267 /* Read fields of flow structure only after checking flags. */
268 smp_mb__after_atomic();
272 #define flow_flag_test(flow, flag) __flow_flag_test(flow, \
273 MLX5E_TC_FLOW_FLAG_##flag)
275 static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow
*flow
)
277 return flow_flag_test(flow
, ESWITCH
);
280 static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow
*flow
)
282 return flow_flag_test(flow
, OFFLOADED
);
285 static inline u32
hash_mod_hdr_info(struct mod_hdr_key
*key
)
287 return jhash(key
->actions
,
288 key
->num_actions
* MLX5_MH_ACT_SZ
, 0);
291 static inline int cmp_mod_hdr_info(struct mod_hdr_key
*a
,
292 struct mod_hdr_key
*b
)
294 if (a
->num_actions
!= b
->num_actions
)
297 return memcmp(a
->actions
, b
->actions
, a
->num_actions
* MLX5_MH_ACT_SZ
);
300 static struct mod_hdr_tbl
*
301 get_mod_hdr_table(struct mlx5e_priv
*priv
, int namespace)
303 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
305 return namespace == MLX5_FLOW_NAMESPACE_FDB
? &esw
->offloads
.mod_hdr
:
306 &priv
->fs
.tc
.mod_hdr
;
309 static struct mlx5e_mod_hdr_entry
*
310 mlx5e_mod_hdr_get(struct mod_hdr_tbl
*tbl
, struct mod_hdr_key
*key
, u32 hash_key
)
312 struct mlx5e_mod_hdr_entry
*mh
, *found
= NULL
;
314 hash_for_each_possible(tbl
->hlist
, mh
, mod_hdr_hlist
, hash_key
) {
315 if (!cmp_mod_hdr_info(&mh
->key
, key
)) {
316 refcount_inc(&mh
->refcnt
);
325 static void mlx5e_mod_hdr_put(struct mlx5e_priv
*priv
,
326 struct mlx5e_mod_hdr_entry
*mh
,
329 struct mod_hdr_tbl
*tbl
= get_mod_hdr_table(priv
, namespace);
331 if (!refcount_dec_and_mutex_lock(&mh
->refcnt
, &tbl
->lock
))
333 hash_del(&mh
->mod_hdr_hlist
);
334 mutex_unlock(&tbl
->lock
);
336 WARN_ON(!list_empty(&mh
->flows
));
337 if (mh
->compl_result
> 0)
338 mlx5_modify_header_dealloc(priv
->mdev
, mh
->modify_hdr
);
343 static int get_flow_name_space(struct mlx5e_tc_flow
*flow
)
345 return mlx5e_is_eswitch_flow(flow
) ?
346 MLX5_FLOW_NAMESPACE_FDB
: MLX5_FLOW_NAMESPACE_KERNEL
;
348 static int mlx5e_attach_mod_hdr(struct mlx5e_priv
*priv
,
349 struct mlx5e_tc_flow
*flow
,
350 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
352 int num_actions
, actions_size
, namespace, err
;
353 struct mlx5e_mod_hdr_entry
*mh
;
354 struct mod_hdr_tbl
*tbl
;
355 struct mod_hdr_key key
;
358 num_actions
= parse_attr
->num_mod_hdr_actions
;
359 actions_size
= MLX5_MH_ACT_SZ
* num_actions
;
361 key
.actions
= parse_attr
->mod_hdr_actions
;
362 key
.num_actions
= num_actions
;
364 hash_key
= hash_mod_hdr_info(&key
);
366 namespace = get_flow_name_space(flow
);
367 tbl
= get_mod_hdr_table(priv
, namespace);
369 mutex_lock(&tbl
->lock
);
370 mh
= mlx5e_mod_hdr_get(tbl
, &key
, hash_key
);
372 mutex_unlock(&tbl
->lock
);
373 wait_for_completion(&mh
->res_ready
);
375 if (mh
->compl_result
< 0) {
377 goto attach_header_err
;
382 mh
= kzalloc(sizeof(*mh
) + actions_size
, GFP_KERNEL
);
384 mutex_unlock(&tbl
->lock
);
388 mh
->key
.actions
= (void *)mh
+ sizeof(*mh
);
389 memcpy(mh
->key
.actions
, key
.actions
, actions_size
);
390 mh
->key
.num_actions
= num_actions
;
391 spin_lock_init(&mh
->flows_lock
);
392 INIT_LIST_HEAD(&mh
->flows
);
393 refcount_set(&mh
->refcnt
, 1);
394 init_completion(&mh
->res_ready
);
396 hash_add(tbl
->hlist
, &mh
->mod_hdr_hlist
, hash_key
);
397 mutex_unlock(&tbl
->lock
);
399 mh
->modify_hdr
= mlx5_modify_header_alloc(priv
->mdev
, namespace,
402 if (IS_ERR(mh
->modify_hdr
)) {
403 err
= PTR_ERR(mh
->modify_hdr
);
404 mh
->compl_result
= err
;
405 goto alloc_header_err
;
407 mh
->compl_result
= 1;
408 complete_all(&mh
->res_ready
);
412 spin_lock(&mh
->flows_lock
);
413 list_add(&flow
->mod_hdr
, &mh
->flows
);
414 spin_unlock(&mh
->flows_lock
);
415 if (mlx5e_is_eswitch_flow(flow
))
416 flow
->esw_attr
->modify_hdr
= mh
->modify_hdr
;
418 flow
->nic_attr
->modify_hdr
= mh
->modify_hdr
;
423 complete_all(&mh
->res_ready
);
425 mlx5e_mod_hdr_put(priv
, mh
, namespace);
429 static void mlx5e_detach_mod_hdr(struct mlx5e_priv
*priv
,
430 struct mlx5e_tc_flow
*flow
)
432 /* flow wasn't fully initialized */
436 spin_lock(&flow
->mh
->flows_lock
);
437 list_del(&flow
->mod_hdr
);
438 spin_unlock(&flow
->mh
->flows_lock
);
440 mlx5e_mod_hdr_put(priv
, flow
->mh
, get_flow_name_space(flow
));
445 struct mlx5_core_dev
*mlx5e_hairpin_get_mdev(struct net
*net
, int ifindex
)
447 struct mlx5_core_dev
*mdev
;
448 struct net_device
*netdev
;
449 struct mlx5e_priv
*priv
;
451 netdev
= dev_get_by_index(net
, ifindex
);
453 return ERR_PTR(-ENODEV
);
455 priv
= netdev_priv(netdev
);
459 /* Mirred tc action holds a refcount on the ifindex net_device (see
460 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
461 * after dev_put(netdev), while we're in the context of adding a tc flow.
463 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
464 * stored in a hairpin object, which exists until all flows, that refer to it, get
467 * On the other hand, after a hairpin object has been created, the peer net_device may
468 * be removed/unbound while there are still some hairpin flows that are using it. This
469 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
470 * NETDEV_UNREGISTER event of the peer net_device.
475 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin
*hp
)
477 u32 in
[MLX5_ST_SZ_DW(create_tir_in
)] = {0};
481 err
= mlx5_core_alloc_transport_domain(hp
->func_mdev
, &hp
->tdn
);
485 tirc
= MLX5_ADDR_OF(create_tir_in
, in
, ctx
);
487 MLX5_SET(tirc
, tirc
, disp_type
, MLX5_TIRC_DISP_TYPE_DIRECT
);
488 MLX5_SET(tirc
, tirc
, inline_rqn
, hp
->pair
->rqn
[0]);
489 MLX5_SET(tirc
, tirc
, transport_domain
, hp
->tdn
);
491 err
= mlx5_core_create_tir(hp
->func_mdev
, in
, MLX5_ST_SZ_BYTES(create_tir_in
), &hp
->tirn
);
498 mlx5_core_dealloc_transport_domain(hp
->func_mdev
, hp
->tdn
);
503 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin
*hp
)
505 mlx5_core_destroy_tir(hp
->func_mdev
, hp
->tirn
);
506 mlx5_core_dealloc_transport_domain(hp
->func_mdev
, hp
->tdn
);
509 static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin
*hp
, void *rqtc
)
511 u32 indirection_rqt
[MLX5E_INDIR_RQT_SIZE
], rqn
;
512 struct mlx5e_priv
*priv
= hp
->func_priv
;
513 int i
, ix
, sz
= MLX5E_INDIR_RQT_SIZE
;
515 mlx5e_build_default_indir_rqt(indirection_rqt
, sz
,
518 for (i
= 0; i
< sz
; i
++) {
520 if (priv
->rss_params
.hfunc
== ETH_RSS_HASH_XOR
)
521 ix
= mlx5e_bits_invert(i
, ilog2(sz
));
522 ix
= indirection_rqt
[ix
];
523 rqn
= hp
->pair
->rqn
[ix
];
524 MLX5_SET(rqtc
, rqtc
, rq_num
[i
], rqn
);
528 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin
*hp
)
530 int inlen
, err
, sz
= MLX5E_INDIR_RQT_SIZE
;
531 struct mlx5e_priv
*priv
= hp
->func_priv
;
532 struct mlx5_core_dev
*mdev
= priv
->mdev
;
536 inlen
= MLX5_ST_SZ_BYTES(create_rqt_in
) + sizeof(u32
) * sz
;
537 in
= kvzalloc(inlen
, GFP_KERNEL
);
541 rqtc
= MLX5_ADDR_OF(create_rqt_in
, in
, rqt_context
);
543 MLX5_SET(rqtc
, rqtc
, rqt_actual_size
, sz
);
544 MLX5_SET(rqtc
, rqtc
, rqt_max_size
, sz
);
546 mlx5e_hairpin_fill_rqt_rqns(hp
, rqtc
);
548 err
= mlx5_core_create_rqt(mdev
, in
, inlen
, &hp
->indir_rqt
.rqtn
);
550 hp
->indir_rqt
.enabled
= true;
556 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin
*hp
)
558 struct mlx5e_priv
*priv
= hp
->func_priv
;
559 u32 in
[MLX5_ST_SZ_DW(create_tir_in
)];
563 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++) {
564 struct mlx5e_tirc_config ttconfig
= mlx5e_tirc_get_default_config(tt
);
566 memset(in
, 0, MLX5_ST_SZ_BYTES(create_tir_in
));
567 tirc
= MLX5_ADDR_OF(create_tir_in
, in
, ctx
);
569 MLX5_SET(tirc
, tirc
, transport_domain
, hp
->tdn
);
570 MLX5_SET(tirc
, tirc
, disp_type
, MLX5_TIRC_DISP_TYPE_INDIRECT
);
571 MLX5_SET(tirc
, tirc
, indirect_table
, hp
->indir_rqt
.rqtn
);
572 mlx5e_build_indir_tir_ctx_hash(&priv
->rss_params
, &ttconfig
, tirc
, false);
574 err
= mlx5_core_create_tir(hp
->func_mdev
, in
,
575 MLX5_ST_SZ_BYTES(create_tir_in
), &hp
->indir_tirn
[tt
]);
577 mlx5_core_warn(hp
->func_mdev
, "create indirect tirs failed, %d\n", err
);
578 goto err_destroy_tirs
;
584 for (i
= 0; i
< tt
; i
++)
585 mlx5_core_destroy_tir(hp
->func_mdev
, hp
->indir_tirn
[i
]);
589 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin
*hp
)
593 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++)
594 mlx5_core_destroy_tir(hp
->func_mdev
, hp
->indir_tirn
[tt
]);
597 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin
*hp
,
598 struct ttc_params
*ttc_params
)
600 struct mlx5_flow_table_attr
*ft_attr
= &ttc_params
->ft_attr
;
603 memset(ttc_params
, 0, sizeof(*ttc_params
));
605 ttc_params
->any_tt_tirn
= hp
->tirn
;
607 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++)
608 ttc_params
->indir_tirn
[tt
] = hp
->indir_tirn
[tt
];
610 ft_attr
->max_fte
= MLX5E_TTC_TABLE_SIZE
;
611 ft_attr
->level
= MLX5E_TC_TTC_FT_LEVEL
;
612 ft_attr
->prio
= MLX5E_TC_PRIO
;
615 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin
*hp
)
617 struct mlx5e_priv
*priv
= hp
->func_priv
;
618 struct ttc_params ttc_params
;
621 err
= mlx5e_hairpin_create_indirect_rqt(hp
);
625 err
= mlx5e_hairpin_create_indirect_tirs(hp
);
627 goto err_create_indirect_tirs
;
629 mlx5e_hairpin_set_ttc_params(hp
, &ttc_params
);
630 err
= mlx5e_create_ttc_table(priv
, &ttc_params
, &hp
->ttc
);
632 goto err_create_ttc_table
;
634 netdev_dbg(priv
->netdev
, "add hairpin: using %d channels rss ttc table id %x\n",
635 hp
->num_channels
, hp
->ttc
.ft
.t
->id
);
639 err_create_ttc_table
:
640 mlx5e_hairpin_destroy_indirect_tirs(hp
);
641 err_create_indirect_tirs
:
642 mlx5e_destroy_rqt(priv
, &hp
->indir_rqt
);
647 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin
*hp
)
649 struct mlx5e_priv
*priv
= hp
->func_priv
;
651 mlx5e_destroy_ttc_table(priv
, &hp
->ttc
);
652 mlx5e_hairpin_destroy_indirect_tirs(hp
);
653 mlx5e_destroy_rqt(priv
, &hp
->indir_rqt
);
656 static struct mlx5e_hairpin
*
657 mlx5e_hairpin_create(struct mlx5e_priv
*priv
, struct mlx5_hairpin_params
*params
,
660 struct mlx5_core_dev
*func_mdev
, *peer_mdev
;
661 struct mlx5e_hairpin
*hp
;
662 struct mlx5_hairpin
*pair
;
665 hp
= kzalloc(sizeof(*hp
), GFP_KERNEL
);
667 return ERR_PTR(-ENOMEM
);
669 func_mdev
= priv
->mdev
;
670 peer_mdev
= mlx5e_hairpin_get_mdev(dev_net(priv
->netdev
), peer_ifindex
);
671 if (IS_ERR(peer_mdev
)) {
672 err
= PTR_ERR(peer_mdev
);
673 goto create_pair_err
;
676 pair
= mlx5_core_hairpin_create(func_mdev
, peer_mdev
, params
);
679 goto create_pair_err
;
682 hp
->func_mdev
= func_mdev
;
683 hp
->func_priv
= priv
;
684 hp
->num_channels
= params
->num_channels
;
686 err
= mlx5e_hairpin_create_transport(hp
);
688 goto create_transport_err
;
690 if (hp
->num_channels
> 1) {
691 err
= mlx5e_hairpin_rss_init(hp
);
699 mlx5e_hairpin_destroy_transport(hp
);
700 create_transport_err
:
701 mlx5_core_hairpin_destroy(hp
->pair
);
707 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin
*hp
)
709 if (hp
->num_channels
> 1)
710 mlx5e_hairpin_rss_cleanup(hp
);
711 mlx5e_hairpin_destroy_transport(hp
);
712 mlx5_core_hairpin_destroy(hp
->pair
);
716 static inline u32
hash_hairpin_info(u16 peer_vhca_id
, u8 prio
)
718 return (peer_vhca_id
<< 16 | prio
);
721 static struct mlx5e_hairpin_entry
*mlx5e_hairpin_get(struct mlx5e_priv
*priv
,
722 u16 peer_vhca_id
, u8 prio
)
724 struct mlx5e_hairpin_entry
*hpe
;
725 u32 hash_key
= hash_hairpin_info(peer_vhca_id
, prio
);
727 hash_for_each_possible(priv
->fs
.tc
.hairpin_tbl
, hpe
,
728 hairpin_hlist
, hash_key
) {
729 if (hpe
->peer_vhca_id
== peer_vhca_id
&& hpe
->prio
== prio
) {
730 refcount_inc(&hpe
->refcnt
);
738 static void mlx5e_hairpin_put(struct mlx5e_priv
*priv
,
739 struct mlx5e_hairpin_entry
*hpe
)
741 /* no more hairpin flows for us, release the hairpin pair */
742 if (!refcount_dec_and_mutex_lock(&hpe
->refcnt
, &priv
->fs
.tc
.hairpin_tbl_lock
))
744 hash_del(&hpe
->hairpin_hlist
);
745 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
747 if (!IS_ERR_OR_NULL(hpe
->hp
)) {
748 netdev_dbg(priv
->netdev
, "del hairpin: peer %s\n",
749 dev_name(hpe
->hp
->pair
->peer_mdev
->device
));
751 mlx5e_hairpin_destroy(hpe
->hp
);
754 WARN_ON(!list_empty(&hpe
->flows
));
758 #define UNKNOWN_MATCH_PRIO 8
760 static int mlx5e_hairpin_get_prio(struct mlx5e_priv
*priv
,
761 struct mlx5_flow_spec
*spec
, u8
*match_prio
,
762 struct netlink_ext_ack
*extack
)
764 void *headers_c
, *headers_v
;
765 u8 prio_val
, prio_mask
= 0;
768 #ifdef CONFIG_MLX5_CORE_EN_DCB
769 if (priv
->dcbx_dp
.trust_state
!= MLX5_QPTS_TRUST_PCP
) {
770 NL_SET_ERR_MSG_MOD(extack
,
771 "only PCP trust state supported for hairpin");
775 headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
, outer_headers
);
776 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
, outer_headers
);
778 vlan_present
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
);
780 prio_mask
= MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, first_prio
);
781 prio_val
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, first_prio
);
784 if (!vlan_present
|| !prio_mask
) {
785 prio_val
= UNKNOWN_MATCH_PRIO
;
786 } else if (prio_mask
!= 0x7) {
787 NL_SET_ERR_MSG_MOD(extack
,
788 "masked priority match not supported for hairpin");
792 *match_prio
= prio_val
;
796 static int mlx5e_hairpin_flow_add(struct mlx5e_priv
*priv
,
797 struct mlx5e_tc_flow
*flow
,
798 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
799 struct netlink_ext_ack
*extack
)
801 int peer_ifindex
= parse_attr
->mirred_ifindex
[0];
802 struct mlx5_hairpin_params params
;
803 struct mlx5_core_dev
*peer_mdev
;
804 struct mlx5e_hairpin_entry
*hpe
;
805 struct mlx5e_hairpin
*hp
;
812 peer_mdev
= mlx5e_hairpin_get_mdev(dev_net(priv
->netdev
), peer_ifindex
);
813 if (IS_ERR(peer_mdev
)) {
814 NL_SET_ERR_MSG_MOD(extack
, "invalid ifindex of mirred device");
815 return PTR_ERR(peer_mdev
);
818 if (!MLX5_CAP_GEN(priv
->mdev
, hairpin
) || !MLX5_CAP_GEN(peer_mdev
, hairpin
)) {
819 NL_SET_ERR_MSG_MOD(extack
, "hairpin is not supported");
823 peer_id
= MLX5_CAP_GEN(peer_mdev
, vhca_id
);
824 err
= mlx5e_hairpin_get_prio(priv
, &parse_attr
->spec
, &match_prio
,
829 mutex_lock(&priv
->fs
.tc
.hairpin_tbl_lock
);
830 hpe
= mlx5e_hairpin_get(priv
, peer_id
, match_prio
);
832 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
833 wait_for_completion(&hpe
->res_ready
);
835 if (IS_ERR(hpe
->hp
)) {
842 hpe
= kzalloc(sizeof(*hpe
), GFP_KERNEL
);
844 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
848 spin_lock_init(&hpe
->flows_lock
);
849 INIT_LIST_HEAD(&hpe
->flows
);
850 INIT_LIST_HEAD(&hpe
->dead_peer_wait_list
);
851 hpe
->peer_vhca_id
= peer_id
;
852 hpe
->prio
= match_prio
;
853 refcount_set(&hpe
->refcnt
, 1);
854 init_completion(&hpe
->res_ready
);
856 hash_add(priv
->fs
.tc
.hairpin_tbl
, &hpe
->hairpin_hlist
,
857 hash_hairpin_info(peer_id
, match_prio
));
858 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
860 params
.log_data_size
= 15;
861 params
.log_data_size
= min_t(u8
, params
.log_data_size
,
862 MLX5_CAP_GEN(priv
->mdev
, log_max_hairpin_wq_data_sz
));
863 params
.log_data_size
= max_t(u8
, params
.log_data_size
,
864 MLX5_CAP_GEN(priv
->mdev
, log_min_hairpin_wq_data_sz
));
866 params
.log_num_packets
= params
.log_data_size
-
867 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv
->mdev
);
868 params
.log_num_packets
= min_t(u8
, params
.log_num_packets
,
869 MLX5_CAP_GEN(priv
->mdev
, log_max_hairpin_num_packets
));
871 params
.q_counter
= priv
->q_counter
;
872 /* set hairpin pair per each 50Gbs share of the link */
873 mlx5e_port_max_linkspeed(priv
->mdev
, &link_speed
);
874 link_speed
= max_t(u32
, link_speed
, 50000);
875 link_speed64
= link_speed
;
876 do_div(link_speed64
, 50000);
877 params
.num_channels
= link_speed64
;
879 hp
= mlx5e_hairpin_create(priv
, ¶ms
, peer_ifindex
);
881 complete_all(&hpe
->res_ready
);
887 netdev_dbg(priv
->netdev
, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
888 hp
->tirn
, hp
->pair
->rqn
[0],
889 dev_name(hp
->pair
->peer_mdev
->device
),
890 hp
->pair
->sqn
[0], match_prio
, params
.log_data_size
, params
.log_num_packets
);
893 if (hpe
->hp
->num_channels
> 1) {
894 flow_flag_set(flow
, HAIRPIN_RSS
);
895 flow
->nic_attr
->hairpin_ft
= hpe
->hp
->ttc
.ft
.t
;
897 flow
->nic_attr
->hairpin_tirn
= hpe
->hp
->tirn
;
901 spin_lock(&hpe
->flows_lock
);
902 list_add(&flow
->hairpin
, &hpe
->flows
);
903 spin_unlock(&hpe
->flows_lock
);
908 mlx5e_hairpin_put(priv
, hpe
);
912 static void mlx5e_hairpin_flow_del(struct mlx5e_priv
*priv
,
913 struct mlx5e_tc_flow
*flow
)
915 /* flow wasn't fully initialized */
919 spin_lock(&flow
->hpe
->flows_lock
);
920 list_del(&flow
->hairpin
);
921 spin_unlock(&flow
->hpe
->flows_lock
);
923 mlx5e_hairpin_put(priv
, flow
->hpe
);
928 mlx5e_tc_add_nic_flow(struct mlx5e_priv
*priv
,
929 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
930 struct mlx5e_tc_flow
*flow
,
931 struct netlink_ext_ack
*extack
)
933 struct mlx5_flow_context
*flow_context
= &parse_attr
->spec
.flow_context
;
934 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
935 struct mlx5_core_dev
*dev
= priv
->mdev
;
936 struct mlx5_flow_destination dest
[2] = {};
937 struct mlx5_flow_act flow_act
= {
938 .action
= attr
->action
,
939 .flags
= FLOW_ACT_NO_APPEND
,
941 struct mlx5_fc
*counter
= NULL
;
942 int err
, dest_ix
= 0;
944 flow_context
->flags
|= FLOW_CONTEXT_HAS_TAG
;
945 flow_context
->flow_tag
= attr
->flow_tag
;
947 if (flow_flag_test(flow
, HAIRPIN
)) {
948 err
= mlx5e_hairpin_flow_add(priv
, flow
, parse_attr
, extack
);
952 if (flow_flag_test(flow
, HAIRPIN_RSS
)) {
953 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
954 dest
[dest_ix
].ft
= attr
->hairpin_ft
;
956 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_TIR
;
957 dest
[dest_ix
].tir_num
= attr
->hairpin_tirn
;
960 } else if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
961 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
962 dest
[dest_ix
].ft
= priv
->fs
.vlan
.ft
.t
;
966 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
967 counter
= mlx5_fc_create(dev
, true);
969 return PTR_ERR(counter
);
971 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_COUNTER
;
972 dest
[dest_ix
].counter_id
= mlx5_fc_id(counter
);
974 attr
->counter
= counter
;
977 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
978 err
= mlx5e_attach_mod_hdr(priv
, flow
, parse_attr
);
979 flow_act
.modify_hdr
= attr
->modify_hdr
;
980 kfree(parse_attr
->mod_hdr_actions
);
985 mutex_lock(&priv
->fs
.tc
.t_lock
);
986 if (IS_ERR_OR_NULL(priv
->fs
.tc
.t
)) {
987 int tc_grp_size
, tc_tbl_size
;
988 u32 max_flow_counter
;
990 max_flow_counter
= (MLX5_CAP_GEN(dev
, max_flow_counter_31_16
) << 16) |
991 MLX5_CAP_GEN(dev
, max_flow_counter_15_0
);
993 tc_grp_size
= min_t(int, max_flow_counter
, MLX5E_TC_TABLE_MAX_GROUP_SIZE
);
995 tc_tbl_size
= min_t(int, tc_grp_size
* MLX5E_TC_TABLE_NUM_GROUPS
,
996 BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev
, log_max_ft_size
)));
999 mlx5_create_auto_grouped_flow_table(priv
->fs
.ns
,
1002 MLX5E_TC_TABLE_NUM_GROUPS
,
1003 MLX5E_TC_FT_LEVEL
, 0);
1004 if (IS_ERR(priv
->fs
.tc
.t
)) {
1005 mutex_unlock(&priv
->fs
.tc
.t_lock
);
1006 NL_SET_ERR_MSG_MOD(extack
,
1007 "Failed to create tc offload table\n");
1008 netdev_err(priv
->netdev
,
1009 "Failed to create tc offload table\n");
1010 return PTR_ERR(priv
->fs
.tc
.t
);
1014 if (attr
->match_level
!= MLX5_MATCH_NONE
)
1015 parse_attr
->spec
.match_criteria_enable
|= MLX5_MATCH_OUTER_HEADERS
;
1017 flow
->rule
[0] = mlx5_add_flow_rules(priv
->fs
.tc
.t
, &parse_attr
->spec
,
1018 &flow_act
, dest
, dest_ix
);
1019 mutex_unlock(&priv
->fs
.tc
.t_lock
);
1021 return PTR_ERR_OR_ZERO(flow
->rule
[0]);
1024 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv
*priv
,
1025 struct mlx5e_tc_flow
*flow
)
1027 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
1028 struct mlx5_fc
*counter
= NULL
;
1030 counter
= attr
->counter
;
1031 if (!IS_ERR_OR_NULL(flow
->rule
[0]))
1032 mlx5_del_flow_rules(flow
->rule
[0]);
1033 mlx5_fc_destroy(priv
->mdev
, counter
);
1035 mutex_lock(&priv
->fs
.tc
.t_lock
);
1036 if (!mlx5e_tc_num_filters(priv
, MLX5_TC_FLAG(NIC_OFFLOAD
)) && priv
->fs
.tc
.t
) {
1037 mlx5_destroy_flow_table(priv
->fs
.tc
.t
);
1038 priv
->fs
.tc
.t
= NULL
;
1040 mutex_unlock(&priv
->fs
.tc
.t_lock
);
1042 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
1043 mlx5e_detach_mod_hdr(priv
, flow
);
1045 if (flow_flag_test(flow
, HAIRPIN
))
1046 mlx5e_hairpin_flow_del(priv
, flow
);
1049 static void mlx5e_detach_encap(struct mlx5e_priv
*priv
,
1050 struct mlx5e_tc_flow
*flow
, int out_index
);
1052 static int mlx5e_attach_encap(struct mlx5e_priv
*priv
,
1053 struct mlx5e_tc_flow
*flow
,
1054 struct net_device
*mirred_dev
,
1056 struct netlink_ext_ack
*extack
,
1057 struct net_device
**encap_dev
,
1060 static struct mlx5_flow_handle
*
1061 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch
*esw
,
1062 struct mlx5e_tc_flow
*flow
,
1063 struct mlx5_flow_spec
*spec
,
1064 struct mlx5_esw_flow_attr
*attr
)
1066 struct mlx5_flow_handle
*rule
;
1068 rule
= mlx5_eswitch_add_offloaded_rule(esw
, spec
, attr
);
1072 if (attr
->split_count
) {
1073 flow
->rule
[1] = mlx5_eswitch_add_fwd_rule(esw
, spec
, attr
);
1074 if (IS_ERR(flow
->rule
[1])) {
1075 mlx5_eswitch_del_offloaded_rule(esw
, rule
, attr
);
1076 return flow
->rule
[1];
1084 mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch
*esw
,
1085 struct mlx5e_tc_flow
*flow
,
1086 struct mlx5_esw_flow_attr
*attr
)
1088 flow_flag_clear(flow
, OFFLOADED
);
1090 if (attr
->split_count
)
1091 mlx5_eswitch_del_fwd_rule(esw
, flow
->rule
[1], attr
);
1093 mlx5_eswitch_del_offloaded_rule(esw
, flow
->rule
[0], attr
);
1096 static struct mlx5_flow_handle
*
1097 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch
*esw
,
1098 struct mlx5e_tc_flow
*flow
,
1099 struct mlx5_flow_spec
*spec
,
1100 struct mlx5_esw_flow_attr
*slow_attr
)
1102 struct mlx5_flow_handle
*rule
;
1104 memcpy(slow_attr
, flow
->esw_attr
, sizeof(*slow_attr
));
1105 slow_attr
->action
= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1106 slow_attr
->split_count
= 0;
1107 slow_attr
->dest_chain
= FDB_SLOW_PATH_CHAIN
;
1109 rule
= mlx5e_tc_offload_fdb_rules(esw
, flow
, spec
, slow_attr
);
1111 flow_flag_set(flow
, SLOW
);
1117 mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch
*esw
,
1118 struct mlx5e_tc_flow
*flow
,
1119 struct mlx5_esw_flow_attr
*slow_attr
)
1121 memcpy(slow_attr
, flow
->esw_attr
, sizeof(*slow_attr
));
1122 slow_attr
->action
= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1123 slow_attr
->split_count
= 0;
1124 slow_attr
->dest_chain
= FDB_SLOW_PATH_CHAIN
;
1125 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, slow_attr
);
1126 flow_flag_clear(flow
, SLOW
);
1129 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1132 static void unready_flow_add(struct mlx5e_tc_flow
*flow
,
1133 struct list_head
*unready_flows
)
1135 flow_flag_set(flow
, NOT_READY
);
1136 list_add_tail(&flow
->unready
, unready_flows
);
1139 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1142 static void unready_flow_del(struct mlx5e_tc_flow
*flow
)
1144 list_del(&flow
->unready
);
1145 flow_flag_clear(flow
, NOT_READY
);
1148 static void add_unready_flow(struct mlx5e_tc_flow
*flow
)
1150 struct mlx5_rep_uplink_priv
*uplink_priv
;
1151 struct mlx5e_rep_priv
*rpriv
;
1152 struct mlx5_eswitch
*esw
;
1154 esw
= flow
->priv
->mdev
->priv
.eswitch
;
1155 rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
1156 uplink_priv
= &rpriv
->uplink_priv
;
1158 mutex_lock(&uplink_priv
->unready_flows_lock
);
1159 unready_flow_add(flow
, &uplink_priv
->unready_flows
);
1160 mutex_unlock(&uplink_priv
->unready_flows_lock
);
1163 static void remove_unready_flow(struct mlx5e_tc_flow
*flow
)
1165 struct mlx5_rep_uplink_priv
*uplink_priv
;
1166 struct mlx5e_rep_priv
*rpriv
;
1167 struct mlx5_eswitch
*esw
;
1169 esw
= flow
->priv
->mdev
->priv
.eswitch
;
1170 rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
1171 uplink_priv
= &rpriv
->uplink_priv
;
1173 mutex_lock(&uplink_priv
->unready_flows_lock
);
1174 unready_flow_del(flow
);
1175 mutex_unlock(&uplink_priv
->unready_flows_lock
);
1179 mlx5e_tc_add_fdb_flow(struct mlx5e_priv
*priv
,
1180 struct mlx5e_tc_flow
*flow
,
1181 struct netlink_ext_ack
*extack
)
1183 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1184 u32 max_chain
= mlx5_eswitch_get_chain_range(esw
);
1185 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
1186 struct mlx5e_tc_flow_parse_attr
*parse_attr
= attr
->parse_attr
;
1187 u16 max_prio
= mlx5_eswitch_get_prio_range(esw
);
1188 struct net_device
*out_dev
, *encap_dev
= NULL
;
1189 struct mlx5_fc
*counter
= NULL
;
1190 struct mlx5e_rep_priv
*rpriv
;
1191 struct mlx5e_priv
*out_priv
;
1192 bool encap_valid
= true;
1196 if (!mlx5_eswitch_prios_supported(esw
) && attr
->prio
!= 1) {
1197 NL_SET_ERR_MSG(extack
, "E-switch priorities unsupported, upgrade FW");
1201 if (attr
->chain
> max_chain
) {
1202 NL_SET_ERR_MSG(extack
, "Requested chain is out of supported range");
1206 if (attr
->prio
> max_prio
) {
1207 NL_SET_ERR_MSG(extack
, "Requested priority is out of supported range");
1211 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++) {
1214 if (!(attr
->dests
[out_index
].flags
& MLX5_ESW_DEST_ENCAP
))
1217 mirred_ifindex
= parse_attr
->mirred_ifindex
[out_index
];
1218 out_dev
= __dev_get_by_index(dev_net(priv
->netdev
),
1220 err
= mlx5e_attach_encap(priv
, flow
, out_dev
, out_index
,
1221 extack
, &encap_dev
, &encap_valid
);
1225 out_priv
= netdev_priv(encap_dev
);
1226 rpriv
= out_priv
->ppriv
;
1227 attr
->dests
[out_index
].rep
= rpriv
->rep
;
1228 attr
->dests
[out_index
].mdev
= out_priv
->mdev
;
1231 err
= mlx5_eswitch_add_vlan_action(esw
, attr
);
1235 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
1236 err
= mlx5e_attach_mod_hdr(priv
, flow
, parse_attr
);
1237 kfree(parse_attr
->mod_hdr_actions
);
1242 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
1243 counter
= mlx5_fc_create(attr
->counter_dev
, true);
1244 if (IS_ERR(counter
))
1245 return PTR_ERR(counter
);
1247 attr
->counter
= counter
;
1250 /* we get here if one of the following takes place:
1251 * (1) there's no error
1252 * (2) there's an encap action and we don't have valid neigh
1255 /* continue with goto slow path rule instead */
1256 struct mlx5_esw_flow_attr slow_attr
;
1258 flow
->rule
[0] = mlx5e_tc_offload_to_slow_path(esw
, flow
, &parse_attr
->spec
, &slow_attr
);
1260 flow
->rule
[0] = mlx5e_tc_offload_fdb_rules(esw
, flow
, &parse_attr
->spec
, attr
);
1263 if (IS_ERR(flow
->rule
[0]))
1264 return PTR_ERR(flow
->rule
[0]);
1266 flow_flag_set(flow
, OFFLOADED
);
1271 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow
*flow
)
1273 struct mlx5_flow_spec
*spec
= &flow
->esw_attr
->parse_attr
->spec
;
1274 void *headers_v
= MLX5_ADDR_OF(fte_match_param
,
1277 u32 geneve_tlv_opt_0_data
= MLX5_GET(fte_match_set_misc3
,
1279 geneve_tlv_option_0_data
);
1281 return !!geneve_tlv_opt_0_data
;
1284 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv
*priv
,
1285 struct mlx5e_tc_flow
*flow
)
1287 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1288 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
1289 struct mlx5_esw_flow_attr slow_attr
;
1292 if (flow_flag_test(flow
, NOT_READY
)) {
1293 remove_unready_flow(flow
);
1294 kvfree(attr
->parse_attr
);
1298 if (mlx5e_is_offloaded_flow(flow
)) {
1299 if (flow_flag_test(flow
, SLOW
))
1300 mlx5e_tc_unoffload_from_slow_path(esw
, flow
, &slow_attr
);
1302 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, attr
);
1305 if (mlx5_flow_has_geneve_opt(flow
))
1306 mlx5_geneve_tlv_option_del(priv
->mdev
->geneve
);
1308 mlx5_eswitch_del_vlan_action(esw
, attr
);
1310 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++)
1311 if (attr
->dests
[out_index
].flags
& MLX5_ESW_DEST_ENCAP
) {
1312 mlx5e_detach_encap(priv
, flow
, out_index
);
1313 kfree(attr
->parse_attr
->tun_info
[out_index
]);
1315 kvfree(attr
->parse_attr
);
1317 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
1318 mlx5e_detach_mod_hdr(priv
, flow
);
1320 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
)
1321 mlx5_fc_destroy(attr
->counter_dev
, attr
->counter
);
1324 void mlx5e_tc_encap_flows_add(struct mlx5e_priv
*priv
,
1325 struct mlx5e_encap_entry
*e
,
1326 struct list_head
*flow_list
)
1328 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1329 struct mlx5_esw_flow_attr slow_attr
, *esw_attr
;
1330 struct mlx5_flow_handle
*rule
;
1331 struct mlx5_flow_spec
*spec
;
1332 struct mlx5e_tc_flow
*flow
;
1335 e
->pkt_reformat
= mlx5_packet_reformat_alloc(priv
->mdev
,
1337 e
->encap_size
, e
->encap_header
,
1338 MLX5_FLOW_NAMESPACE_FDB
);
1339 if (IS_ERR(e
->pkt_reformat
)) {
1340 mlx5_core_warn(priv
->mdev
, "Failed to offload cached encapsulation header, %lu\n",
1341 PTR_ERR(e
->pkt_reformat
));
1344 e
->flags
|= MLX5_ENCAP_ENTRY_VALID
;
1345 mlx5e_rep_queue_neigh_stats_work(priv
);
1347 list_for_each_entry(flow
, flow_list
, tmp_list
) {
1348 bool all_flow_encaps_valid
= true;
1351 if (!mlx5e_is_offloaded_flow(flow
))
1353 esw_attr
= flow
->esw_attr
;
1354 spec
= &esw_attr
->parse_attr
->spec
;
1356 esw_attr
->dests
[flow
->tmp_efi_index
].pkt_reformat
= e
->pkt_reformat
;
1357 esw_attr
->dests
[flow
->tmp_efi_index
].flags
|= MLX5_ESW_DEST_ENCAP_VALID
;
1358 /* Flow can be associated with multiple encap entries.
1359 * Before offloading the flow verify that all of them have
1360 * a valid neighbour.
1362 for (i
= 0; i
< MLX5_MAX_FLOW_FWD_VPORTS
; i
++) {
1363 if (!(esw_attr
->dests
[i
].flags
& MLX5_ESW_DEST_ENCAP
))
1365 if (!(esw_attr
->dests
[i
].flags
& MLX5_ESW_DEST_ENCAP_VALID
)) {
1366 all_flow_encaps_valid
= false;
1370 /* Do not offload flows with unresolved neighbors */
1371 if (!all_flow_encaps_valid
)
1373 /* update from slow path rule to encap rule */
1374 rule
= mlx5e_tc_offload_fdb_rules(esw
, flow
, spec
, esw_attr
);
1376 err
= PTR_ERR(rule
);
1377 mlx5_core_warn(priv
->mdev
, "Failed to update cached encapsulation flow, %d\n",
1382 mlx5e_tc_unoffload_from_slow_path(esw
, flow
, &slow_attr
);
1383 flow
->rule
[0] = rule
;
1384 /* was unset when slow path rule removed */
1385 flow_flag_set(flow
, OFFLOADED
);
1389 void mlx5e_tc_encap_flows_del(struct mlx5e_priv
*priv
,
1390 struct mlx5e_encap_entry
*e
,
1391 struct list_head
*flow_list
)
1393 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1394 struct mlx5_esw_flow_attr slow_attr
;
1395 struct mlx5_flow_handle
*rule
;
1396 struct mlx5_flow_spec
*spec
;
1397 struct mlx5e_tc_flow
*flow
;
1400 list_for_each_entry(flow
, flow_list
, tmp_list
) {
1401 if (!mlx5e_is_offloaded_flow(flow
))
1403 spec
= &flow
->esw_attr
->parse_attr
->spec
;
1405 /* update from encap rule to slow path rule */
1406 rule
= mlx5e_tc_offload_to_slow_path(esw
, flow
, spec
, &slow_attr
);
1407 /* mark the flow's encap dest as non-valid */
1408 flow
->esw_attr
->dests
[flow
->tmp_efi_index
].flags
&= ~MLX5_ESW_DEST_ENCAP_VALID
;
1411 err
= PTR_ERR(rule
);
1412 mlx5_core_warn(priv
->mdev
, "Failed to update slow path (encap) flow, %d\n",
1417 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, flow
->esw_attr
);
1418 flow
->rule
[0] = rule
;
1419 /* was unset when fast path rule removed */
1420 flow_flag_set(flow
, OFFLOADED
);
1423 /* we know that the encap is valid */
1424 e
->flags
&= ~MLX5_ENCAP_ENTRY_VALID
;
1425 mlx5_packet_reformat_dealloc(priv
->mdev
, e
->pkt_reformat
);
1428 static struct mlx5_fc
*mlx5e_tc_get_counter(struct mlx5e_tc_flow
*flow
)
1430 if (mlx5e_is_eswitch_flow(flow
))
1431 return flow
->esw_attr
->counter
;
1433 return flow
->nic_attr
->counter
;
1436 /* Takes reference to all flows attached to encap and adds the flows to
1437 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
1439 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry
*e
, struct list_head
*flow_list
)
1441 struct encap_flow_item
*efi
;
1442 struct mlx5e_tc_flow
*flow
;
1444 list_for_each_entry(efi
, &e
->flows
, list
) {
1445 flow
= container_of(efi
, struct mlx5e_tc_flow
, encaps
[efi
->index
]);
1446 if (IS_ERR(mlx5e_flow_get(flow
)))
1448 wait_for_completion(&flow
->init_done
);
1450 flow
->tmp_efi_index
= efi
->index
;
1451 list_add(&flow
->tmp_list
, flow_list
);
1455 /* Iterate over tmp_list of flows attached to flow_list head. */
1456 void mlx5e_put_encap_flow_list(struct mlx5e_priv
*priv
, struct list_head
*flow_list
)
1458 struct mlx5e_tc_flow
*flow
, *tmp
;
1460 list_for_each_entry_safe(flow
, tmp
, flow_list
, tmp_list
)
1461 mlx5e_flow_put(priv
, flow
);
1464 static struct mlx5e_encap_entry
*
1465 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry
*nhe
,
1466 struct mlx5e_encap_entry
*e
)
1468 struct mlx5e_encap_entry
*next
= NULL
;
1473 /* find encap with non-zero reference counter value */
1475 list_next_or_null_rcu(&nhe
->encap_list
,
1477 struct mlx5e_encap_entry
,
1479 list_first_or_null_rcu(&nhe
->encap_list
,
1480 struct mlx5e_encap_entry
,
1483 next
= list_next_or_null_rcu(&nhe
->encap_list
,
1485 struct mlx5e_encap_entry
,
1487 if (mlx5e_encap_take(next
))
1492 /* release starting encap */
1494 mlx5e_encap_put(netdev_priv(e
->out_dev
), e
);
1498 /* wait for encap to be fully initialized */
1499 wait_for_completion(&next
->res_ready
);
1500 /* continue searching if encap entry is not in valid state after completion */
1501 if (!(next
->flags
& MLX5_ENCAP_ENTRY_VALID
)) {
1509 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry
*nhe
)
1511 struct mlx5e_neigh
*m_neigh
= &nhe
->m_neigh
;
1512 struct mlx5e_encap_entry
*e
= NULL
;
1513 struct mlx5e_tc_flow
*flow
;
1514 struct mlx5_fc
*counter
;
1515 struct neigh_table
*tbl
;
1516 bool neigh_used
= false;
1517 struct neighbour
*n
;
1520 if (m_neigh
->family
== AF_INET
)
1522 #if IS_ENABLED(CONFIG_IPV6)
1523 else if (m_neigh
->family
== AF_INET6
)
1524 tbl
= ipv6_stub
->nd_tbl
;
1529 /* mlx5e_get_next_valid_encap() releases previous encap before returning
1532 while ((e
= mlx5e_get_next_valid_encap(nhe
, e
)) != NULL
) {
1533 struct mlx5e_priv
*priv
= netdev_priv(e
->out_dev
);
1534 struct encap_flow_item
*efi
, *tmp
;
1535 struct mlx5_eswitch
*esw
;
1536 LIST_HEAD(flow_list
);
1538 esw
= priv
->mdev
->priv
.eswitch
;
1539 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
1540 list_for_each_entry_safe(efi
, tmp
, &e
->flows
, list
) {
1541 flow
= container_of(efi
, struct mlx5e_tc_flow
,
1542 encaps
[efi
->index
]);
1543 if (IS_ERR(mlx5e_flow_get(flow
)))
1545 list_add(&flow
->tmp_list
, &flow_list
);
1547 if (mlx5e_is_offloaded_flow(flow
)) {
1548 counter
= mlx5e_tc_get_counter(flow
);
1549 lastuse
= mlx5_fc_query_lastuse(counter
);
1550 if (time_after((unsigned long)lastuse
, nhe
->reported_lastuse
)) {
1556 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
1558 mlx5e_put_encap_flow_list(priv
, &flow_list
);
1560 /* release current encap before breaking the loop */
1561 mlx5e_encap_put(priv
, e
);
1566 trace_mlx5e_tc_update_neigh_used_value(nhe
, neigh_used
);
1569 nhe
->reported_lastuse
= jiffies
;
1571 /* find the relevant neigh according to the cached device and
1574 n
= neigh_lookup(tbl
, &m_neigh
->dst_ip
, m_neigh
->dev
);
1578 neigh_event_send(n
, NULL
);
1583 static void mlx5e_encap_dealloc(struct mlx5e_priv
*priv
, struct mlx5e_encap_entry
*e
)
1585 WARN_ON(!list_empty(&e
->flows
));
1587 if (e
->compl_result
> 0) {
1588 mlx5e_rep_encap_entry_detach(netdev_priv(e
->out_dev
), e
);
1590 if (e
->flags
& MLX5_ENCAP_ENTRY_VALID
)
1591 mlx5_packet_reformat_dealloc(priv
->mdev
, e
->pkt_reformat
);
1595 kfree(e
->encap_header
);
1599 void mlx5e_encap_put(struct mlx5e_priv
*priv
, struct mlx5e_encap_entry
*e
)
1601 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1603 if (!refcount_dec_and_mutex_lock(&e
->refcnt
, &esw
->offloads
.encap_tbl_lock
))
1605 hash_del_rcu(&e
->encap_hlist
);
1606 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
1608 mlx5e_encap_dealloc(priv
, e
);
1611 static void mlx5e_detach_encap(struct mlx5e_priv
*priv
,
1612 struct mlx5e_tc_flow
*flow
, int out_index
)
1614 struct mlx5e_encap_entry
*e
= flow
->encaps
[out_index
].e
;
1615 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1617 /* flow wasn't fully initialized */
1621 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
1622 list_del(&flow
->encaps
[out_index
].list
);
1623 flow
->encaps
[out_index
].e
= NULL
;
1624 if (!refcount_dec_and_test(&e
->refcnt
)) {
1625 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
1628 hash_del_rcu(&e
->encap_hlist
);
1629 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
1631 mlx5e_encap_dealloc(priv
, e
);
1634 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow
*flow
)
1636 struct mlx5_eswitch
*esw
= flow
->priv
->mdev
->priv
.eswitch
;
1638 if (!flow_flag_test(flow
, ESWITCH
) ||
1639 !flow_flag_test(flow
, DUP
))
1642 mutex_lock(&esw
->offloads
.peer_mutex
);
1643 list_del(&flow
->peer
);
1644 mutex_unlock(&esw
->offloads
.peer_mutex
);
1646 flow_flag_clear(flow
, DUP
);
1648 if (refcount_dec_and_test(&flow
->peer_flow
->refcnt
)) {
1649 mlx5e_tc_del_fdb_flow(flow
->peer_flow
->priv
, flow
->peer_flow
);
1650 kfree(flow
->peer_flow
);
1653 flow
->peer_flow
= NULL
;
1656 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow
*flow
)
1658 struct mlx5_core_dev
*dev
= flow
->priv
->mdev
;
1659 struct mlx5_devcom
*devcom
= dev
->priv
.devcom
;
1660 struct mlx5_eswitch
*peer_esw
;
1662 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
1666 __mlx5e_tc_del_fdb_peer_flow(flow
);
1667 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
1670 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
1671 struct mlx5e_tc_flow
*flow
)
1673 if (mlx5e_is_eswitch_flow(flow
)) {
1674 mlx5e_tc_del_fdb_peer_flow(flow
);
1675 mlx5e_tc_del_fdb_flow(priv
, flow
);
1677 mlx5e_tc_del_nic_flow(priv
, flow
);
1682 static int parse_tunnel_attr(struct mlx5e_priv
*priv
,
1683 struct mlx5_flow_spec
*spec
,
1684 struct flow_cls_offload
*f
,
1685 struct net_device
*filter_dev
, u8
*match_level
)
1687 struct netlink_ext_ack
*extack
= f
->common
.extack
;
1688 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1690 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1692 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
1695 err
= mlx5e_tc_tun_parse(filter_dev
, priv
, spec
, f
,
1696 headers_c
, headers_v
, match_level
);
1698 NL_SET_ERR_MSG_MOD(extack
,
1699 "failed to parse tunnel attributes");
1703 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ENC_CONTROL
)) {
1704 struct flow_match_control match
;
1707 flow_rule_match_enc_control(rule
, &match
);
1708 addr_type
= match
.key
->addr_type
;
1710 /* For tunnel addr_type used same key id`s as for non-tunnel */
1711 if (addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
1712 struct flow_match_ipv4_addrs match
;
1714 flow_rule_match_enc_ipv4_addrs(rule
, &match
);
1715 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
1716 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
1717 ntohl(match
.mask
->src
));
1718 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
1719 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
,
1720 ntohl(match
.key
->src
));
1722 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
1723 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
1724 ntohl(match
.mask
->dst
));
1725 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
1726 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
,
1727 ntohl(match
.key
->dst
));
1729 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
,
1731 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
1733 } else if (addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
1734 struct flow_match_ipv6_addrs match
;
1736 flow_rule_match_enc_ipv6_addrs(rule
, &match
);
1737 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
1738 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
1739 &match
.mask
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
,
1741 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1742 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
1743 &match
.key
->src
, MLX5_FLD_SZ_BYTES(ipv6_layout
,
1746 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
1747 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
1748 &match
.mask
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
,
1750 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1751 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
1752 &match
.key
->dst
, MLX5_FLD_SZ_BYTES(ipv6_layout
,
1755 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
,
1757 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
1762 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ENC_IP
)) {
1763 struct flow_match_ip match
;
1765 flow_rule_match_enc_ip(rule
, &match
);
1766 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_ecn
,
1767 match
.mask
->tos
& 0x3);
1768 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_ecn
,
1769 match
.key
->tos
& 0x3);
1771 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_dscp
,
1772 match
.mask
->tos
>> 2);
1773 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_dscp
,
1774 match
.key
->tos
>> 2);
1776 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ttl_hoplimit
,
1778 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ttl_hoplimit
,
1781 if (match
.mask
->ttl
&&
1782 !MLX5_CAP_ESW_FLOWTABLE_FDB
1784 ft_field_support
.outer_ipv4_ttl
)) {
1785 NL_SET_ERR_MSG_MOD(extack
,
1786 "Matching on TTL is not supported");
1792 /* Enforce DMAC when offloading incoming tunneled flows.
1793 * Flow counters require a match on the DMAC.
1795 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_47_16
);
1796 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, dmac_15_0
);
1797 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1798 dmac_47_16
), priv
->netdev
->dev_addr
);
1800 /* let software handle IP fragments */
1801 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
1802 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
, 0);
1807 static void *get_match_headers_criteria(u32 flags
,
1808 struct mlx5_flow_spec
*spec
)
1810 return (flags
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ?
1811 MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1813 MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1817 static void *get_match_headers_value(u32 flags
,
1818 struct mlx5_flow_spec
*spec
)
1820 return (flags
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ?
1821 MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1823 MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1827 static int __parse_cls_flower(struct mlx5e_priv
*priv
,
1828 struct mlx5_flow_spec
*spec
,
1829 struct flow_cls_offload
*f
,
1830 struct net_device
*filter_dev
,
1831 u8
*inner_match_level
, u8
*outer_match_level
)
1833 struct netlink_ext_ack
*extack
= f
->common
.extack
;
1834 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1836 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1838 void *misc_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
1840 void *misc_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
1842 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
1843 struct flow_dissector
*dissector
= rule
->match
.dissector
;
1848 match_level
= outer_match_level
;
1850 if (dissector
->used_keys
&
1851 ~(BIT(FLOW_DISSECTOR_KEY_META
) |
1852 BIT(FLOW_DISSECTOR_KEY_CONTROL
) |
1853 BIT(FLOW_DISSECTOR_KEY_BASIC
) |
1854 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS
) |
1855 BIT(FLOW_DISSECTOR_KEY_VLAN
) |
1856 BIT(FLOW_DISSECTOR_KEY_CVLAN
) |
1857 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS
) |
1858 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS
) |
1859 BIT(FLOW_DISSECTOR_KEY_PORTS
) |
1860 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID
) |
1861 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) |
1862 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
) |
1863 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS
) |
1864 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL
) |
1865 BIT(FLOW_DISSECTOR_KEY_TCP
) |
1866 BIT(FLOW_DISSECTOR_KEY_IP
) |
1867 BIT(FLOW_DISSECTOR_KEY_ENC_IP
) |
1868 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS
))) {
1869 NL_SET_ERR_MSG_MOD(extack
, "Unsupported key");
1870 netdev_dbg(priv
->netdev
, "Unsupported key used: 0x%x\n",
1871 dissector
->used_keys
);
1875 if (mlx5e_get_tc_tun(filter_dev
)) {
1876 if (parse_tunnel_attr(priv
, spec
, f
, filter_dev
,
1880 /* At this point, header pointers should point to the inner
1881 * headers, outer header were already set by parse_tunnel_attr
1883 match_level
= inner_match_level
;
1884 headers_c
= get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP
,
1886 headers_v
= get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP
,
1890 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_BASIC
)) {
1891 struct flow_match_basic match
;
1893 flow_rule_match_basic(rule
, &match
);
1894 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ethertype
,
1895 ntohs(match
.mask
->n_proto
));
1896 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
1897 ntohs(match
.key
->n_proto
));
1899 if (match
.mask
->n_proto
)
1900 *match_level
= MLX5_MATCH_L2
;
1902 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_VLAN
) ||
1903 is_vlan_dev(filter_dev
)) {
1904 struct flow_dissector_key_vlan filter_dev_mask
;
1905 struct flow_dissector_key_vlan filter_dev_key
;
1906 struct flow_match_vlan match
;
1908 if (is_vlan_dev(filter_dev
)) {
1909 match
.key
= &filter_dev_key
;
1910 match
.key
->vlan_id
= vlan_dev_vlan_id(filter_dev
);
1911 match
.key
->vlan_tpid
= vlan_dev_vlan_proto(filter_dev
);
1912 match
.key
->vlan_priority
= 0;
1913 match
.mask
= &filter_dev_mask
;
1914 memset(match
.mask
, 0xff, sizeof(*match
.mask
));
1915 match
.mask
->vlan_priority
= 0;
1917 flow_rule_match_vlan(rule
, &match
);
1919 if (match
.mask
->vlan_id
||
1920 match
.mask
->vlan_priority
||
1921 match
.mask
->vlan_tpid
) {
1922 if (match
.key
->vlan_tpid
== htons(ETH_P_8021AD
)) {
1923 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
1925 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
1928 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
1930 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
1934 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_vid
,
1935 match
.mask
->vlan_id
);
1936 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_vid
,
1937 match
.key
->vlan_id
);
1939 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_prio
,
1940 match
.mask
->vlan_priority
);
1941 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_prio
,
1942 match
.key
->vlan_priority
);
1944 *match_level
= MLX5_MATCH_L2
;
1946 } else if (*match_level
!= MLX5_MATCH_NONE
) {
1947 /* cvlan_tag enabled in match criteria and
1948 * disabled in match value means both S & C tags
1949 * don't exist (untagged of both)
1951 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
, 1);
1952 *match_level
= MLX5_MATCH_L2
;
1955 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_CVLAN
)) {
1956 struct flow_match_vlan match
;
1958 flow_rule_match_cvlan(rule
, &match
);
1959 if (match
.mask
->vlan_id
||
1960 match
.mask
->vlan_priority
||
1961 match
.mask
->vlan_tpid
) {
1962 if (match
.key
->vlan_tpid
== htons(ETH_P_8021AD
)) {
1963 MLX5_SET(fte_match_set_misc
, misc_c
,
1964 outer_second_svlan_tag
, 1);
1965 MLX5_SET(fte_match_set_misc
, misc_v
,
1966 outer_second_svlan_tag
, 1);
1968 MLX5_SET(fte_match_set_misc
, misc_c
,
1969 outer_second_cvlan_tag
, 1);
1970 MLX5_SET(fte_match_set_misc
, misc_v
,
1971 outer_second_cvlan_tag
, 1);
1974 MLX5_SET(fte_match_set_misc
, misc_c
, outer_second_vid
,
1975 match
.mask
->vlan_id
);
1976 MLX5_SET(fte_match_set_misc
, misc_v
, outer_second_vid
,
1977 match
.key
->vlan_id
);
1978 MLX5_SET(fte_match_set_misc
, misc_c
, outer_second_prio
,
1979 match
.mask
->vlan_priority
);
1980 MLX5_SET(fte_match_set_misc
, misc_v
, outer_second_prio
,
1981 match
.key
->vlan_priority
);
1983 *match_level
= MLX5_MATCH_L2
;
1987 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ETH_ADDRS
)) {
1988 struct flow_match_eth_addrs match
;
1990 flow_rule_match_eth_addrs(rule
, &match
);
1991 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
1994 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
1998 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2001 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2005 if (!is_zero_ether_addr(match
.mask
->src
) ||
2006 !is_zero_ether_addr(match
.mask
->dst
))
2007 *match_level
= MLX5_MATCH_L2
;
2010 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_CONTROL
)) {
2011 struct flow_match_control match
;
2013 flow_rule_match_control(rule
, &match
);
2014 addr_type
= match
.key
->addr_type
;
2016 /* the HW doesn't support frag first/later */
2017 if (match
.mask
->flags
& FLOW_DIS_FIRST_FRAG
)
2020 if (match
.mask
->flags
& FLOW_DIS_IS_FRAGMENT
) {
2021 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
2022 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
,
2023 match
.key
->flags
& FLOW_DIS_IS_FRAGMENT
);
2025 /* the HW doesn't need L3 inline to match on frag=no */
2026 if (!(match
.key
->flags
& FLOW_DIS_IS_FRAGMENT
))
2027 *match_level
= MLX5_MATCH_L2
;
2028 /* *** L2 attributes parsing up to here *** */
2030 *match_level
= MLX5_MATCH_L3
;
2034 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_BASIC
)) {
2035 struct flow_match_basic match
;
2037 flow_rule_match_basic(rule
, &match
);
2038 ip_proto
= match
.key
->ip_proto
;
2040 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
,
2041 match
.mask
->ip_proto
);
2042 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
,
2043 match
.key
->ip_proto
);
2045 if (match
.mask
->ip_proto
)
2046 *match_level
= MLX5_MATCH_L3
;
2049 if (addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
2050 struct flow_match_ipv4_addrs match
;
2052 flow_rule_match_ipv4_addrs(rule
, &match
);
2053 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2054 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2055 &match
.mask
->src
, sizeof(match
.mask
->src
));
2056 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2057 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2058 &match
.key
->src
, sizeof(match
.key
->src
));
2059 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2060 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2061 &match
.mask
->dst
, sizeof(match
.mask
->dst
));
2062 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2063 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2064 &match
.key
->dst
, sizeof(match
.key
->dst
));
2066 if (match
.mask
->src
|| match
.mask
->dst
)
2067 *match_level
= MLX5_MATCH_L3
;
2070 if (addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
2071 struct flow_match_ipv6_addrs match
;
2073 flow_rule_match_ipv6_addrs(rule
, &match
);
2074 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2075 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
2076 &match
.mask
->src
, sizeof(match
.mask
->src
));
2077 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2078 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
2079 &match
.key
->src
, sizeof(match
.key
->src
));
2081 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2082 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
2083 &match
.mask
->dst
, sizeof(match
.mask
->dst
));
2084 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2085 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
2086 &match
.key
->dst
, sizeof(match
.key
->dst
));
2088 if (ipv6_addr_type(&match
.mask
->src
) != IPV6_ADDR_ANY
||
2089 ipv6_addr_type(&match
.mask
->dst
) != IPV6_ADDR_ANY
)
2090 *match_level
= MLX5_MATCH_L3
;
2093 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_IP
)) {
2094 struct flow_match_ip match
;
2096 flow_rule_match_ip(rule
, &match
);
2097 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_ecn
,
2098 match
.mask
->tos
& 0x3);
2099 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_ecn
,
2100 match
.key
->tos
& 0x3);
2102 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_dscp
,
2103 match
.mask
->tos
>> 2);
2104 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_dscp
,
2105 match
.key
->tos
>> 2);
2107 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ttl_hoplimit
,
2109 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ttl_hoplimit
,
2112 if (match
.mask
->ttl
&&
2113 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv
->mdev
,
2114 ft_field_support
.outer_ipv4_ttl
)) {
2115 NL_SET_ERR_MSG_MOD(extack
,
2116 "Matching on TTL is not supported");
2120 if (match
.mask
->tos
|| match
.mask
->ttl
)
2121 *match_level
= MLX5_MATCH_L3
;
2124 /* *** L3 attributes parsing up to here *** */
2126 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_PORTS
)) {
2127 struct flow_match_ports match
;
2129 flow_rule_match_ports(rule
, &match
);
2132 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2133 tcp_sport
, ntohs(match
.mask
->src
));
2134 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2135 tcp_sport
, ntohs(match
.key
->src
));
2137 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2138 tcp_dport
, ntohs(match
.mask
->dst
));
2139 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2140 tcp_dport
, ntohs(match
.key
->dst
));
2144 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2145 udp_sport
, ntohs(match
.mask
->src
));
2146 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2147 udp_sport
, ntohs(match
.key
->src
));
2149 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2150 udp_dport
, ntohs(match
.mask
->dst
));
2151 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2152 udp_dport
, ntohs(match
.key
->dst
));
2155 NL_SET_ERR_MSG_MOD(extack
,
2156 "Only UDP and TCP transports are supported for L4 matching");
2157 netdev_err(priv
->netdev
,
2158 "Only UDP and TCP transport are supported\n");
2162 if (match
.mask
->src
|| match
.mask
->dst
)
2163 *match_level
= MLX5_MATCH_L4
;
2166 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_TCP
)) {
2167 struct flow_match_tcp match
;
2169 flow_rule_match_tcp(rule
, &match
);
2170 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, tcp_flags
,
2171 ntohs(match
.mask
->flags
));
2172 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, tcp_flags
,
2173 ntohs(match
.key
->flags
));
2175 if (match
.mask
->flags
)
2176 *match_level
= MLX5_MATCH_L4
;
2182 static int parse_cls_flower(struct mlx5e_priv
*priv
,
2183 struct mlx5e_tc_flow
*flow
,
2184 struct mlx5_flow_spec
*spec
,
2185 struct flow_cls_offload
*f
,
2186 struct net_device
*filter_dev
)
2188 u8 inner_match_level
, outer_match_level
, non_tunnel_match_level
;
2189 struct netlink_ext_ack
*extack
= f
->common
.extack
;
2190 struct mlx5_core_dev
*dev
= priv
->mdev
;
2191 struct mlx5_eswitch
*esw
= dev
->priv
.eswitch
;
2192 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
2193 struct mlx5_eswitch_rep
*rep
;
2194 bool is_eswitch_flow
;
2197 inner_match_level
= MLX5_MATCH_NONE
;
2198 outer_match_level
= MLX5_MATCH_NONE
;
2200 err
= __parse_cls_flower(priv
, spec
, f
, filter_dev
, &inner_match_level
,
2201 &outer_match_level
);
2202 non_tunnel_match_level
= (inner_match_level
== MLX5_MATCH_NONE
) ?
2203 outer_match_level
: inner_match_level
;
2205 is_eswitch_flow
= mlx5e_is_eswitch_flow(flow
);
2206 if (!err
&& is_eswitch_flow
) {
2208 if (rep
->vport
!= MLX5_VPORT_UPLINK
&&
2209 (esw
->offloads
.inline_mode
!= MLX5_INLINE_MODE_NONE
&&
2210 esw
->offloads
.inline_mode
< non_tunnel_match_level
)) {
2211 NL_SET_ERR_MSG_MOD(extack
,
2212 "Flow is not offloaded due to min inline setting");
2213 netdev_warn(priv
->netdev
,
2214 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2215 non_tunnel_match_level
, esw
->offloads
.inline_mode
);
2220 if (is_eswitch_flow
) {
2221 flow
->esw_attr
->inner_match_level
= inner_match_level
;
2222 flow
->esw_attr
->outer_match_level
= outer_match_level
;
2224 flow
->nic_attr
->match_level
= non_tunnel_match_level
;
2230 struct pedit_headers
{
2232 struct vlan_hdr vlan
;
2239 struct pedit_headers_action
{
2240 struct pedit_headers vals
;
2241 struct pedit_headers masks
;
2245 static int pedit_header_offsets
[] = {
2246 [FLOW_ACT_MANGLE_HDR_TYPE_ETH
] = offsetof(struct pedit_headers
, eth
),
2247 [FLOW_ACT_MANGLE_HDR_TYPE_IP4
] = offsetof(struct pedit_headers
, ip4
),
2248 [FLOW_ACT_MANGLE_HDR_TYPE_IP6
] = offsetof(struct pedit_headers
, ip6
),
2249 [FLOW_ACT_MANGLE_HDR_TYPE_TCP
] = offsetof(struct pedit_headers
, tcp
),
2250 [FLOW_ACT_MANGLE_HDR_TYPE_UDP
] = offsetof(struct pedit_headers
, udp
),
2253 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2255 static int set_pedit_val(u8 hdr_type
, u32 mask
, u32 val
, u32 offset
,
2256 struct pedit_headers_action
*hdrs
)
2258 u32
*curr_pmask
, *curr_pval
;
2260 curr_pmask
= (u32
*)(pedit_header(&hdrs
->masks
, hdr_type
) + offset
);
2261 curr_pval
= (u32
*)(pedit_header(&hdrs
->vals
, hdr_type
) + offset
);
2263 if (*curr_pmask
& mask
) /* disallow acting twice on the same location */
2266 *curr_pmask
|= mask
;
2267 *curr_pval
|= (val
& mask
);
2275 struct mlx5_fields
{
2282 #define OFFLOAD(fw_field, size, field, off, match_field) \
2283 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, \
2284 offsetof(struct pedit_headers, field) + (off), \
2285 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2287 /* masked values are the same and there are no rewrites that do not have a
2290 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2291 type matchmaskx = *(type *)(matchmaskp); \
2292 type matchvalx = *(type *)(matchvalp); \
2293 type maskx = *(type *)(maskp); \
2294 type valx = *(type *)(valp); \
2296 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2300 static bool cmp_val_mask(void *valp
, void *maskp
, void *matchvalp
,
2301 void *matchmaskp
, int size
)
2307 same
= SAME_VAL_MASK(u8
, valp
, maskp
, matchvalp
, matchmaskp
);
2310 same
= SAME_VAL_MASK(u16
, valp
, maskp
, matchvalp
, matchmaskp
);
2313 same
= SAME_VAL_MASK(u32
, valp
, maskp
, matchvalp
, matchmaskp
);
2320 static struct mlx5_fields fields
[] = {
2321 OFFLOAD(DMAC_47_16
, 4, eth
.h_dest
[0], 0, dmac_47_16
),
2322 OFFLOAD(DMAC_15_0
, 2, eth
.h_dest
[4], 0, dmac_15_0
),
2323 OFFLOAD(SMAC_47_16
, 4, eth
.h_source
[0], 0, smac_47_16
),
2324 OFFLOAD(SMAC_15_0
, 2, eth
.h_source
[4], 0, smac_15_0
),
2325 OFFLOAD(ETHERTYPE
, 2, eth
.h_proto
, 0, ethertype
),
2326 OFFLOAD(FIRST_VID
, 2, vlan
.h_vlan_TCI
, 0, first_vid
),
2328 OFFLOAD(IP_TTL
, 1, ip4
.ttl
, 0, ttl_hoplimit
),
2329 OFFLOAD(SIPV4
, 4, ip4
.saddr
, 0, src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2330 OFFLOAD(DIPV4
, 4, ip4
.daddr
, 0, dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2332 OFFLOAD(SIPV6_127_96
, 4, ip6
.saddr
.s6_addr32
[0], 0,
2333 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[0]),
2334 OFFLOAD(SIPV6_95_64
, 4, ip6
.saddr
.s6_addr32
[1], 0,
2335 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[4]),
2336 OFFLOAD(SIPV6_63_32
, 4, ip6
.saddr
.s6_addr32
[2], 0,
2337 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[8]),
2338 OFFLOAD(SIPV6_31_0
, 4, ip6
.saddr
.s6_addr32
[3], 0,
2339 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[12]),
2340 OFFLOAD(DIPV6_127_96
, 4, ip6
.daddr
.s6_addr32
[0], 0,
2341 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[0]),
2342 OFFLOAD(DIPV6_95_64
, 4, ip6
.daddr
.s6_addr32
[1], 0,
2343 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[4]),
2344 OFFLOAD(DIPV6_63_32
, 4, ip6
.daddr
.s6_addr32
[2], 0,
2345 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[8]),
2346 OFFLOAD(DIPV6_31_0
, 4, ip6
.daddr
.s6_addr32
[3], 0,
2347 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[12]),
2348 OFFLOAD(IPV6_HOPLIMIT
, 1, ip6
.hop_limit
, 0, ttl_hoplimit
),
2350 OFFLOAD(TCP_SPORT
, 2, tcp
.source
, 0, tcp_sport
),
2351 OFFLOAD(TCP_DPORT
, 2, tcp
.dest
, 0, tcp_dport
),
2352 OFFLOAD(TCP_FLAGS
, 1, tcp
.ack_seq
, 5, tcp_flags
),
2354 OFFLOAD(UDP_SPORT
, 2, udp
.source
, 0, udp_sport
),
2355 OFFLOAD(UDP_DPORT
, 2, udp
.dest
, 0, udp_dport
),
2358 /* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
2359 * max from the SW pedit action. On success, attr->num_mod_hdr_actions
2360 * says how many HW actions were actually parsed.
2362 static int offload_pedit_fields(struct pedit_headers_action
*hdrs
,
2363 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2365 struct netlink_ext_ack
*extack
)
2367 struct pedit_headers
*set_masks
, *add_masks
, *set_vals
, *add_vals
;
2368 void *headers_c
= get_match_headers_criteria(*action_flags
,
2370 void *headers_v
= get_match_headers_value(*action_flags
,
2372 int i
, action_size
, nactions
, max_actions
, first
, last
, next_z
;
2373 void *s_masks_p
, *a_masks_p
, *vals_p
;
2374 struct mlx5_fields
*f
;
2375 u8 cmd
, field_bsize
;
2382 set_masks
= &hdrs
[0].masks
;
2383 add_masks
= &hdrs
[1].masks
;
2384 set_vals
= &hdrs
[0].vals
;
2385 add_vals
= &hdrs
[1].vals
;
2387 action_size
= MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto
);
2388 action
= parse_attr
->mod_hdr_actions
+
2389 parse_attr
->num_mod_hdr_actions
* action_size
;
2391 max_actions
= parse_attr
->max_mod_hdr_actions
;
2392 nactions
= parse_attr
->num_mod_hdr_actions
;
2394 for (i
= 0; i
< ARRAY_SIZE(fields
); i
++) {
2398 /* avoid seeing bits set from previous iterations */
2402 s_masks_p
= (void *)set_masks
+ f
->offset
;
2403 a_masks_p
= (void *)add_masks
+ f
->offset
;
2405 memcpy(&s_mask
, s_masks_p
, f
->size
);
2406 memcpy(&a_mask
, a_masks_p
, f
->size
);
2408 if (!s_mask
&& !a_mask
) /* nothing to offload here */
2411 if (s_mask
&& a_mask
) {
2412 NL_SET_ERR_MSG_MOD(extack
,
2413 "can't set and add to the same HW field");
2414 printk(KERN_WARNING
"mlx5: can't set and add to the same HW field (%x)\n", f
->field
);
2418 if (nactions
== max_actions
) {
2419 NL_SET_ERR_MSG_MOD(extack
,
2420 "too many pedit actions, can't offload");
2421 printk(KERN_WARNING
"mlx5: parsed %d pedit actions, can't do more\n", nactions
);
2427 void *match_mask
= headers_c
+ f
->match_offset
;
2428 void *match_val
= headers_v
+ f
->match_offset
;
2430 cmd
= MLX5_ACTION_TYPE_SET
;
2432 vals_p
= (void *)set_vals
+ f
->offset
;
2433 /* don't rewrite if we have a match on the same value */
2434 if (cmp_val_mask(vals_p
, s_masks_p
, match_val
,
2435 match_mask
, f
->size
))
2437 /* clear to denote we consumed this field */
2438 memset(s_masks_p
, 0, f
->size
);
2442 cmd
= MLX5_ACTION_TYPE_ADD
;
2444 vals_p
= (void *)add_vals
+ f
->offset
;
2445 /* add 0 is no change */
2446 if (!memcmp(vals_p
, &zero
, f
->size
))
2448 /* clear to denote we consumed this field */
2449 memset(a_masks_p
, 0, f
->size
);
2454 field_bsize
= f
->size
* BITS_PER_BYTE
;
2456 if (field_bsize
== 32) {
2457 mask_be32
= (__be32
)mask
;
2458 mask
= (__force
unsigned long)cpu_to_le32(be32_to_cpu(mask_be32
));
2459 } else if (field_bsize
== 16) {
2460 mask_be32
= (__be32
)mask
;
2461 mask_be16
= *(__be16
*)&mask_be32
;
2462 mask
= (__force
unsigned long)cpu_to_le16(be16_to_cpu(mask_be16
));
2465 first
= find_first_bit(&mask
, field_bsize
);
2466 next_z
= find_next_zero_bit(&mask
, field_bsize
, first
);
2467 last
= find_last_bit(&mask
, field_bsize
);
2468 if (first
< next_z
&& next_z
< last
) {
2469 NL_SET_ERR_MSG_MOD(extack
,
2470 "rewrite of few sub-fields isn't supported");
2471 printk(KERN_WARNING
"mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2476 MLX5_SET(set_action_in
, action
, action_type
, cmd
);
2477 MLX5_SET(set_action_in
, action
, field
, f
->field
);
2479 if (cmd
== MLX5_ACTION_TYPE_SET
) {
2480 MLX5_SET(set_action_in
, action
, offset
, first
);
2481 /* length is num of bits to be written, zero means length of 32 */
2482 MLX5_SET(set_action_in
, action
, length
, (last
- first
+ 1));
2485 if (field_bsize
== 32)
2486 MLX5_SET(set_action_in
, action
, data
, ntohl(*(__be32
*)vals_p
) >> first
);
2487 else if (field_bsize
== 16)
2488 MLX5_SET(set_action_in
, action
, data
, ntohs(*(__be16
*)vals_p
) >> first
);
2489 else if (field_bsize
== 8)
2490 MLX5_SET(set_action_in
, action
, data
, *(u8
*)vals_p
>> first
);
2492 action
+= action_size
;
2496 parse_attr
->num_mod_hdr_actions
= nactions
;
2500 static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev
*mdev
,
2503 if (namespace == MLX5_FLOW_NAMESPACE_FDB
) /* FDB offloading */
2504 return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev
, max_modify_header_actions
);
2505 else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
2506 return MLX5_CAP_FLOWTABLE_NIC_RX(mdev
, max_modify_header_actions
);
2509 static int alloc_mod_hdr_actions(struct mlx5e_priv
*priv
,
2510 struct pedit_headers_action
*hdrs
,
2512 struct mlx5e_tc_flow_parse_attr
*parse_attr
)
2514 int nkeys
, action_size
, max_actions
;
2516 nkeys
= hdrs
[TCA_PEDIT_KEY_EX_CMD_SET
].pedits
+
2517 hdrs
[TCA_PEDIT_KEY_EX_CMD_ADD
].pedits
;
2518 action_size
= MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto
);
2520 max_actions
= mlx5e_flow_namespace_max_modify_action(priv
->mdev
, namespace);
2521 /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
2522 max_actions
= min(max_actions
, nkeys
* 16);
2524 parse_attr
->mod_hdr_actions
= kcalloc(max_actions
, action_size
, GFP_KERNEL
);
2525 if (!parse_attr
->mod_hdr_actions
)
2528 parse_attr
->max_mod_hdr_actions
= max_actions
;
2532 static const struct pedit_headers zero_masks
= {};
2534 static int parse_tc_pedit_action(struct mlx5e_priv
*priv
,
2535 const struct flow_action_entry
*act
, int namespace,
2536 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2537 struct pedit_headers_action
*hdrs
,
2538 struct netlink_ext_ack
*extack
)
2540 u8 cmd
= (act
->id
== FLOW_ACTION_MANGLE
) ? 0 : 1;
2541 int err
= -EOPNOTSUPP
;
2542 u32 mask
, val
, offset
;
2545 htype
= act
->mangle
.htype
;
2546 err
= -EOPNOTSUPP
; /* can't be all optimistic */
2548 if (htype
== FLOW_ACT_MANGLE_UNSPEC
) {
2549 NL_SET_ERR_MSG_MOD(extack
, "legacy pedit isn't offloaded");
2553 if (!mlx5e_flow_namespace_max_modify_action(priv
->mdev
, namespace)) {
2554 NL_SET_ERR_MSG_MOD(extack
,
2555 "The pedit offload action is not supported");
2559 mask
= act
->mangle
.mask
;
2560 val
= act
->mangle
.val
;
2561 offset
= act
->mangle
.offset
;
2563 err
= set_pedit_val(htype
, ~mask
, val
, offset
, &hdrs
[cmd
]);
2574 static int alloc_tc_pedit_action(struct mlx5e_priv
*priv
, int namespace,
2575 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2576 struct pedit_headers_action
*hdrs
,
2578 struct netlink_ext_ack
*extack
)
2580 struct pedit_headers
*cmd_masks
;
2584 if (!parse_attr
->mod_hdr_actions
) {
2585 err
= alloc_mod_hdr_actions(priv
, hdrs
, namespace, parse_attr
);
2590 err
= offload_pedit_fields(hdrs
, parse_attr
, action_flags
, extack
);
2592 goto out_dealloc_parsed_actions
;
2594 for (cmd
= 0; cmd
< __PEDIT_CMD_MAX
; cmd
++) {
2595 cmd_masks
= &hdrs
[cmd
].masks
;
2596 if (memcmp(cmd_masks
, &zero_masks
, sizeof(zero_masks
))) {
2597 NL_SET_ERR_MSG_MOD(extack
,
2598 "attempt to offload an unsupported field");
2599 netdev_warn(priv
->netdev
, "attempt to offload an unsupported field (cmd %d)\n", cmd
);
2600 print_hex_dump(KERN_WARNING
, "mask: ", DUMP_PREFIX_ADDRESS
,
2601 16, 1, cmd_masks
, sizeof(zero_masks
), true);
2603 goto out_dealloc_parsed_actions
;
2609 out_dealloc_parsed_actions
:
2610 kfree(parse_attr
->mod_hdr_actions
);
2615 static bool csum_offload_supported(struct mlx5e_priv
*priv
,
2618 struct netlink_ext_ack
*extack
)
2620 u32 prot_flags
= TCA_CSUM_UPDATE_FLAG_IPV4HDR
| TCA_CSUM_UPDATE_FLAG_TCP
|
2621 TCA_CSUM_UPDATE_FLAG_UDP
;
2623 /* The HW recalcs checksums only if re-writing headers */
2624 if (!(action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)) {
2625 NL_SET_ERR_MSG_MOD(extack
,
2626 "TC csum action is only offloaded with pedit");
2627 netdev_warn(priv
->netdev
,
2628 "TC csum action is only offloaded with pedit\n");
2632 if (update_flags
& ~prot_flags
) {
2633 NL_SET_ERR_MSG_MOD(extack
,
2634 "can't offload TC csum action for some header/s");
2635 netdev_warn(priv
->netdev
,
2636 "can't offload TC csum action for some header/s - flags %#x\n",
2644 struct ip_ttl_word
{
2650 struct ipv6_hoplimit_word
{
2656 static bool is_action_keys_supported(const struct flow_action_entry
*act
)
2661 htype
= act
->mangle
.htype
;
2662 offset
= act
->mangle
.offset
;
2663 mask
= ~act
->mangle
.mask
;
2664 /* For IPv4 & IPv6 header check 4 byte word,
2665 * to determine that modified fields
2666 * are NOT ttl & hop_limit only.
2668 if (htype
== FLOW_ACT_MANGLE_HDR_TYPE_IP4
) {
2669 struct ip_ttl_word
*ttl_word
=
2670 (struct ip_ttl_word
*)&mask
;
2672 if (offset
!= offsetof(struct iphdr
, ttl
) ||
2673 ttl_word
->protocol
||
2677 } else if (htype
== FLOW_ACT_MANGLE_HDR_TYPE_IP6
) {
2678 struct ipv6_hoplimit_word
*hoplimit_word
=
2679 (struct ipv6_hoplimit_word
*)&mask
;
2681 if (offset
!= offsetof(struct ipv6hdr
, payload_len
) ||
2682 hoplimit_word
->payload_len
||
2683 hoplimit_word
->nexthdr
) {
2690 static bool modify_header_match_supported(struct mlx5_flow_spec
*spec
,
2691 struct flow_action
*flow_action
,
2693 struct netlink_ext_ack
*extack
)
2695 const struct flow_action_entry
*act
;
2696 bool modify_ip_header
;
2702 headers_v
= get_match_headers_value(actions
, spec
);
2703 ethertype
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ethertype
);
2705 /* for non-IP we only re-write MACs, so we're okay */
2706 if (ethertype
!= ETH_P_IP
&& ethertype
!= ETH_P_IPV6
)
2709 modify_ip_header
= false;
2710 flow_action_for_each(i
, act
, flow_action
) {
2711 if (act
->id
!= FLOW_ACTION_MANGLE
&&
2712 act
->id
!= FLOW_ACTION_ADD
)
2715 if (is_action_keys_supported(act
)) {
2716 modify_ip_header
= true;
2721 ip_proto
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
);
2722 if (modify_ip_header
&& ip_proto
!= IPPROTO_TCP
&&
2723 ip_proto
!= IPPROTO_UDP
&& ip_proto
!= IPPROTO_ICMP
) {
2724 NL_SET_ERR_MSG_MOD(extack
,
2725 "can't offload re-write of non TCP/UDP");
2726 pr_info("can't offload re-write of ip proto %d\n", ip_proto
);
2734 static bool actions_match_supported(struct mlx5e_priv
*priv
,
2735 struct flow_action
*flow_action
,
2736 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2737 struct mlx5e_tc_flow
*flow
,
2738 struct netlink_ext_ack
*extack
)
2742 if (mlx5e_is_eswitch_flow(flow
))
2743 actions
= flow
->esw_attr
->action
;
2745 actions
= flow
->nic_attr
->action
;
2747 if (flow_flag_test(flow
, EGRESS
) &&
2748 !((actions
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ||
2749 (actions
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
) ||
2750 (actions
& MLX5_FLOW_CONTEXT_ACTION_DROP
)))
2753 if (actions
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
2754 return modify_header_match_supported(&parse_attr
->spec
,
2755 flow_action
, actions
,
2761 static bool same_hw_devs(struct mlx5e_priv
*priv
, struct mlx5e_priv
*peer_priv
)
2763 struct mlx5_core_dev
*fmdev
, *pmdev
;
2764 u64 fsystem_guid
, psystem_guid
;
2767 pmdev
= peer_priv
->mdev
;
2769 fsystem_guid
= mlx5_query_nic_system_image_guid(fmdev
);
2770 psystem_guid
= mlx5_query_nic_system_image_guid(pmdev
);
2772 return (fsystem_guid
== psystem_guid
);
2775 static int add_vlan_rewrite_action(struct mlx5e_priv
*priv
, int namespace,
2776 const struct flow_action_entry
*act
,
2777 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2778 struct pedit_headers_action
*hdrs
,
2779 u32
*action
, struct netlink_ext_ack
*extack
)
2781 u16 mask16
= VLAN_VID_MASK
;
2782 u16 val16
= act
->vlan
.vid
& VLAN_VID_MASK
;
2783 const struct flow_action_entry pedit_act
= {
2784 .id
= FLOW_ACTION_MANGLE
,
2785 .mangle
.htype
= FLOW_ACT_MANGLE_HDR_TYPE_ETH
,
2786 .mangle
.offset
= offsetof(struct vlan_ethhdr
, h_vlan_TCI
),
2787 .mangle
.mask
= ~(u32
)be16_to_cpu(*(__be16
*)&mask16
),
2788 .mangle
.val
= (u32
)be16_to_cpu(*(__be16
*)&val16
),
2790 u8 match_prio_mask
, match_prio_val
;
2791 void *headers_c
, *headers_v
;
2794 headers_c
= get_match_headers_criteria(*action
, &parse_attr
->spec
);
2795 headers_v
= get_match_headers_value(*action
, &parse_attr
->spec
);
2797 if (!(MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
) &&
2798 MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
))) {
2799 NL_SET_ERR_MSG_MOD(extack
,
2800 "VLAN rewrite action must have VLAN protocol match");
2804 match_prio_mask
= MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, first_prio
);
2805 match_prio_val
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, first_prio
);
2806 if (act
->vlan
.prio
!= (match_prio_val
& match_prio_mask
)) {
2807 NL_SET_ERR_MSG_MOD(extack
,
2808 "Changing VLAN prio is not supported");
2812 err
= parse_tc_pedit_action(priv
, &pedit_act
, namespace, parse_attr
,
2814 *action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
2820 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv
*priv
,
2821 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2822 struct pedit_headers_action
*hdrs
,
2823 u32
*action
, struct netlink_ext_ack
*extack
)
2825 const struct flow_action_entry prio_tag_act
= {
2828 MLX5_GET(fte_match_set_lyr_2_4
,
2829 get_match_headers_value(*action
,
2832 MLX5_GET(fte_match_set_lyr_2_4
,
2833 get_match_headers_criteria(*action
,
2838 return add_vlan_rewrite_action(priv
, MLX5_FLOW_NAMESPACE_FDB
,
2839 &prio_tag_act
, parse_attr
, hdrs
, action
,
2843 static int parse_tc_nic_actions(struct mlx5e_priv
*priv
,
2844 struct flow_action
*flow_action
,
2845 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
2846 struct mlx5e_tc_flow
*flow
,
2847 struct netlink_ext_ack
*extack
)
2849 struct mlx5_nic_flow_attr
*attr
= flow
->nic_attr
;
2850 struct pedit_headers_action hdrs
[2] = {};
2851 const struct flow_action_entry
*act
;
2855 if (!flow_action_has_entries(flow_action
))
2858 attr
->flow_tag
= MLX5_FS_DEFAULT_FLOW_TAG
;
2860 flow_action_for_each(i
, act
, flow_action
) {
2862 case FLOW_ACTION_DROP
:
2863 action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
;
2864 if (MLX5_CAP_FLOWTABLE(priv
->mdev
,
2865 flow_table_properties_nic_receive
.flow_counter
))
2866 action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
2868 case FLOW_ACTION_MANGLE
:
2869 case FLOW_ACTION_ADD
:
2870 err
= parse_tc_pedit_action(priv
, act
, MLX5_FLOW_NAMESPACE_KERNEL
,
2871 parse_attr
, hdrs
, extack
);
2875 action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
|
2876 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
2878 case FLOW_ACTION_VLAN_MANGLE
:
2879 err
= add_vlan_rewrite_action(priv
,
2880 MLX5_FLOW_NAMESPACE_KERNEL
,
2881 act
, parse_attr
, hdrs
,
2887 case FLOW_ACTION_CSUM
:
2888 if (csum_offload_supported(priv
, action
,
2894 case FLOW_ACTION_REDIRECT
: {
2895 struct net_device
*peer_dev
= act
->dev
;
2897 if (priv
->netdev
->netdev_ops
== peer_dev
->netdev_ops
&&
2898 same_hw_devs(priv
, netdev_priv(peer_dev
))) {
2899 parse_attr
->mirred_ifindex
[0] = peer_dev
->ifindex
;
2900 flow_flag_set(flow
, HAIRPIN
);
2901 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
2902 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
2904 NL_SET_ERR_MSG_MOD(extack
,
2905 "device is not on same HW, can't offload");
2906 netdev_warn(priv
->netdev
, "device %s not on same HW, can't offload\n",
2912 case FLOW_ACTION_MARK
: {
2913 u32 mark
= act
->mark
;
2915 if (mark
& ~MLX5E_TC_FLOW_ID_MASK
) {
2916 NL_SET_ERR_MSG_MOD(extack
,
2917 "Bad flow mark - only 16 bit is supported");
2921 attr
->flow_tag
= mark
;
2922 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
2926 NL_SET_ERR_MSG_MOD(extack
, "The offload action is not supported");
2931 if (hdrs
[TCA_PEDIT_KEY_EX_CMD_SET
].pedits
||
2932 hdrs
[TCA_PEDIT_KEY_EX_CMD_ADD
].pedits
) {
2933 err
= alloc_tc_pedit_action(priv
, MLX5_FLOW_NAMESPACE_KERNEL
,
2934 parse_attr
, hdrs
, &action
, extack
);
2937 /* in case all pedit actions are skipped, remove the MOD_HDR
2940 if (parse_attr
->num_mod_hdr_actions
== 0) {
2941 action
&= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
2942 kfree(parse_attr
->mod_hdr_actions
);
2946 attr
->action
= action
;
2947 if (!actions_match_supported(priv
, flow_action
, parse_attr
, flow
, extack
))
2954 const struct ip_tunnel_key
*ip_tun_key
;
2955 struct mlx5e_tc_tunnel
*tc_tunnel
;
2958 static inline int cmp_encap_info(struct encap_key
*a
,
2959 struct encap_key
*b
)
2961 return memcmp(a
->ip_tun_key
, b
->ip_tun_key
, sizeof(*a
->ip_tun_key
)) ||
2962 a
->tc_tunnel
->tunnel_type
!= b
->tc_tunnel
->tunnel_type
;
2965 static inline int hash_encap_info(struct encap_key
*key
)
2967 return jhash(key
->ip_tun_key
, sizeof(*key
->ip_tun_key
),
2968 key
->tc_tunnel
->tunnel_type
);
2972 static bool is_merged_eswitch_dev(struct mlx5e_priv
*priv
,
2973 struct net_device
*peer_netdev
)
2975 struct mlx5e_priv
*peer_priv
;
2977 peer_priv
= netdev_priv(peer_netdev
);
2979 return (MLX5_CAP_ESW(priv
->mdev
, merged_eswitch
) &&
2980 mlx5e_eswitch_rep(priv
->netdev
) &&
2981 mlx5e_eswitch_rep(peer_netdev
) &&
2982 same_hw_devs(priv
, peer_priv
));
2987 bool mlx5e_encap_take(struct mlx5e_encap_entry
*e
)
2989 return refcount_inc_not_zero(&e
->refcnt
);
2992 static struct mlx5e_encap_entry
*
2993 mlx5e_encap_get(struct mlx5e_priv
*priv
, struct encap_key
*key
,
2996 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
2997 struct mlx5e_encap_entry
*e
;
2998 struct encap_key e_key
;
3000 hash_for_each_possible_rcu(esw
->offloads
.encap_tbl
, e
,
3001 encap_hlist
, hash_key
) {
3002 e_key
.ip_tun_key
= &e
->tun_info
->key
;
3003 e_key
.tc_tunnel
= e
->tunnel
;
3004 if (!cmp_encap_info(&e_key
, key
) &&
3005 mlx5e_encap_take(e
))
3012 static struct ip_tunnel_info
*dup_tun_info(const struct ip_tunnel_info
*tun_info
)
3014 size_t tun_size
= sizeof(*tun_info
) + tun_info
->options_len
;
3016 return kmemdup(tun_info
, tun_size
, GFP_KERNEL
);
3019 static int mlx5e_attach_encap(struct mlx5e_priv
*priv
,
3020 struct mlx5e_tc_flow
*flow
,
3021 struct net_device
*mirred_dev
,
3023 struct netlink_ext_ack
*extack
,
3024 struct net_device
**encap_dev
,
3027 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3028 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
3029 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3030 const struct ip_tunnel_info
*tun_info
;
3031 struct encap_key key
;
3032 struct mlx5e_encap_entry
*e
;
3033 unsigned short family
;
3037 parse_attr
= attr
->parse_attr
;
3038 tun_info
= parse_attr
->tun_info
[out_index
];
3039 family
= ip_tunnel_info_af(tun_info
);
3040 key
.ip_tun_key
= &tun_info
->key
;
3041 key
.tc_tunnel
= mlx5e_get_tc_tun(mirred_dev
);
3042 if (!key
.tc_tunnel
) {
3043 NL_SET_ERR_MSG_MOD(extack
, "Unsupported tunnel");
3047 hash_key
= hash_encap_info(&key
);
3049 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
3050 e
= mlx5e_encap_get(priv
, &key
, hash_key
);
3052 /* must verify if encap is valid or not */
3054 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3055 wait_for_completion(&e
->res_ready
);
3057 /* Protect against concurrent neigh update. */
3058 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
3059 if (e
->compl_result
< 0) {
3066 e
= kzalloc(sizeof(*e
), GFP_KERNEL
);
3072 refcount_set(&e
->refcnt
, 1);
3073 init_completion(&e
->res_ready
);
3075 tun_info
= dup_tun_info(tun_info
);
3080 e
->tun_info
= tun_info
;
3081 err
= mlx5e_tc_tun_init_encap_attr(mirred_dev
, priv
, e
, extack
);
3085 INIT_LIST_HEAD(&e
->flows
);
3086 hash_add_rcu(esw
->offloads
.encap_tbl
, &e
->encap_hlist
, hash_key
);
3087 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3089 if (family
== AF_INET
)
3090 err
= mlx5e_tc_tun_create_header_ipv4(priv
, mirred_dev
, e
);
3091 else if (family
== AF_INET6
)
3092 err
= mlx5e_tc_tun_create_header_ipv6(priv
, mirred_dev
, e
);
3094 /* Protect against concurrent neigh update. */
3095 mutex_lock(&esw
->offloads
.encap_tbl_lock
);
3096 complete_all(&e
->res_ready
);
3098 e
->compl_result
= err
;
3101 e
->compl_result
= 1;
3104 flow
->encaps
[out_index
].e
= e
;
3105 list_add(&flow
->encaps
[out_index
].list
, &e
->flows
);
3106 flow
->encaps
[out_index
].index
= out_index
;
3107 *encap_dev
= e
->out_dev
;
3108 if (e
->flags
& MLX5_ENCAP_ENTRY_VALID
) {
3109 attr
->dests
[out_index
].pkt_reformat
= e
->pkt_reformat
;
3110 attr
->dests
[out_index
].flags
|= MLX5_ESW_DEST_ENCAP_VALID
;
3111 *encap_valid
= true;
3113 *encap_valid
= false;
3115 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3120 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3122 mlx5e_encap_put(priv
, e
);
3126 mutex_unlock(&esw
->offloads
.encap_tbl_lock
);
3132 static int parse_tc_vlan_action(struct mlx5e_priv
*priv
,
3133 const struct flow_action_entry
*act
,
3134 struct mlx5_esw_flow_attr
*attr
,
3137 u8 vlan_idx
= attr
->total_vlan
;
3139 if (vlan_idx
>= MLX5_FS_VLAN_DEPTH
)
3143 case FLOW_ACTION_VLAN_POP
:
3145 if (!mlx5_eswitch_vlan_actions_supported(priv
->mdev
,
3146 MLX5_FS_VLAN_DEPTH
))
3149 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2
;
3151 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
3154 case FLOW_ACTION_VLAN_PUSH
:
3155 attr
->vlan_vid
[vlan_idx
] = act
->vlan
.vid
;
3156 attr
->vlan_prio
[vlan_idx
] = act
->vlan
.prio
;
3157 attr
->vlan_proto
[vlan_idx
] = act
->vlan
.proto
;
3158 if (!attr
->vlan_proto
[vlan_idx
])
3159 attr
->vlan_proto
[vlan_idx
] = htons(ETH_P_8021Q
);
3162 if (!mlx5_eswitch_vlan_actions_supported(priv
->mdev
,
3163 MLX5_FS_VLAN_DEPTH
))
3166 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2
;
3168 if (!mlx5_eswitch_vlan_actions_supported(priv
->mdev
, 1) &&
3169 (act
->vlan
.proto
!= htons(ETH_P_8021Q
) ||
3173 *action
|= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
;
3180 attr
->total_vlan
= vlan_idx
+ 1;
3185 static int add_vlan_push_action(struct mlx5e_priv
*priv
,
3186 struct mlx5_esw_flow_attr
*attr
,
3187 struct net_device
**out_dev
,
3190 struct net_device
*vlan_dev
= *out_dev
;
3191 struct flow_action_entry vlan_act
= {
3192 .id
= FLOW_ACTION_VLAN_PUSH
,
3193 .vlan
.vid
= vlan_dev_vlan_id(vlan_dev
),
3194 .vlan
.proto
= vlan_dev_vlan_proto(vlan_dev
),
3199 err
= parse_tc_vlan_action(priv
, &vlan_act
, attr
, action
);
3204 *out_dev
= dev_get_by_index_rcu(dev_net(vlan_dev
), dev_get_iflink(vlan_dev
));
3209 if (is_vlan_dev(*out_dev
))
3210 err
= add_vlan_push_action(priv
, attr
, out_dev
, action
);
3215 static int add_vlan_pop_action(struct mlx5e_priv
*priv
,
3216 struct mlx5_esw_flow_attr
*attr
,
3219 struct flow_action_entry vlan_act
= {
3220 .id
= FLOW_ACTION_VLAN_POP
,
3222 int nest_level
, err
= 0;
3224 nest_level
= attr
->parse_attr
->filter_dev
->lower_level
-
3225 priv
->netdev
->lower_level
;
3226 while (nest_level
--) {
3227 err
= parse_tc_vlan_action(priv
, &vlan_act
, attr
, action
);
3235 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv
*priv
,
3236 struct net_device
*out_dev
)
3238 if (is_merged_eswitch_dev(priv
, out_dev
))
3241 return mlx5e_eswitch_rep(out_dev
) &&
3242 same_hw_devs(priv
, netdev_priv(out_dev
));
3245 static int parse_tc_fdb_actions(struct mlx5e_priv
*priv
,
3246 struct flow_action
*flow_action
,
3247 struct mlx5e_tc_flow
*flow
,
3248 struct netlink_ext_ack
*extack
)
3250 struct pedit_headers_action hdrs
[2] = {};
3251 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3252 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
3253 struct mlx5e_tc_flow_parse_attr
*parse_attr
= attr
->parse_attr
;
3254 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3255 const struct ip_tunnel_info
*info
= NULL
;
3256 const struct flow_action_entry
*act
;
3261 if (!flow_action_has_entries(flow_action
))
3264 flow_action_for_each(i
, act
, flow_action
) {
3266 case FLOW_ACTION_DROP
:
3267 action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
|
3268 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3270 case FLOW_ACTION_MANGLE
:
3271 case FLOW_ACTION_ADD
:
3272 err
= parse_tc_pedit_action(priv
, act
, MLX5_FLOW_NAMESPACE_FDB
,
3273 parse_attr
, hdrs
, extack
);
3277 action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
3278 attr
->split_count
= attr
->out_count
;
3280 case FLOW_ACTION_CSUM
:
3281 if (csum_offload_supported(priv
, action
,
3282 act
->csum_flags
, extack
))
3286 case FLOW_ACTION_REDIRECT
:
3287 case FLOW_ACTION_MIRRED
: {
3288 struct mlx5e_priv
*out_priv
;
3289 struct net_device
*out_dev
;
3293 /* out_dev is NULL when filters with
3294 * non-existing mirred device are replayed to
3300 if (attr
->out_count
>= MLX5_MAX_FLOW_FWD_VPORTS
) {
3301 NL_SET_ERR_MSG_MOD(extack
,
3302 "can't support more output ports, can't offload forwarding");
3303 pr_err("can't support more than %d output ports, can't offload forwarding\n",
3308 action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
|
3309 MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3311 parse_attr
->mirred_ifindex
[attr
->out_count
] =
3313 parse_attr
->tun_info
[attr
->out_count
] = dup_tun_info(info
);
3314 if (!parse_attr
->tun_info
[attr
->out_count
])
3317 attr
->dests
[attr
->out_count
].flags
|=
3318 MLX5_ESW_DEST_ENCAP
;
3320 /* attr->dests[].rep is resolved when we
3323 } else if (netdev_port_same_parent_id(priv
->netdev
, out_dev
)) {
3324 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3325 struct net_device
*uplink_dev
= mlx5_eswitch_uplink_get_proto_dev(esw
, REP_ETH
);
3326 struct net_device
*uplink_upper
;
3330 netdev_master_upper_dev_get_rcu(uplink_dev
);
3332 netif_is_lag_master(uplink_upper
) &&
3333 uplink_upper
== out_dev
)
3334 out_dev
= uplink_dev
;
3337 if (is_vlan_dev(out_dev
)) {
3338 err
= add_vlan_push_action(priv
, attr
,
3345 if (is_vlan_dev(parse_attr
->filter_dev
)) {
3346 err
= add_vlan_pop_action(priv
, attr
,
3352 if (!mlx5e_is_valid_eswitch_fwd_dev(priv
, out_dev
)) {
3353 NL_SET_ERR_MSG_MOD(extack
,
3354 "devices are not on same switch HW, can't offload forwarding");
3355 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
3356 priv
->netdev
->name
, out_dev
->name
);
3360 out_priv
= netdev_priv(out_dev
);
3361 rpriv
= out_priv
->ppriv
;
3362 attr
->dests
[attr
->out_count
].rep
= rpriv
->rep
;
3363 attr
->dests
[attr
->out_count
].mdev
= out_priv
->mdev
;
3365 } else if (parse_attr
->filter_dev
!= priv
->netdev
) {
3366 /* All mlx5 devices are called to configure
3367 * high level device filters. Therefore, the
3368 * *attempt* to install a filter on invalid
3369 * eswitch should not trigger an explicit error
3373 NL_SET_ERR_MSG_MOD(extack
,
3374 "devices are not on same switch HW, can't offload forwarding");
3375 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
3376 priv
->netdev
->name
, out_dev
->name
);
3381 case FLOW_ACTION_TUNNEL_ENCAP
:
3389 case FLOW_ACTION_VLAN_PUSH
:
3390 case FLOW_ACTION_VLAN_POP
:
3391 if (act
->id
== FLOW_ACTION_VLAN_PUSH
&&
3392 (action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
)) {
3393 /* Replace vlan pop+push with vlan modify */
3394 action
&= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
3395 err
= add_vlan_rewrite_action(priv
,
3396 MLX5_FLOW_NAMESPACE_FDB
,
3397 act
, parse_attr
, hdrs
,
3400 err
= parse_tc_vlan_action(priv
, act
, attr
, &action
);
3405 attr
->split_count
= attr
->out_count
;
3407 case FLOW_ACTION_VLAN_MANGLE
:
3408 err
= add_vlan_rewrite_action(priv
,
3409 MLX5_FLOW_NAMESPACE_FDB
,
3410 act
, parse_attr
, hdrs
,
3415 attr
->split_count
= attr
->out_count
;
3417 case FLOW_ACTION_TUNNEL_DECAP
:
3418 action
|= MLX5_FLOW_CONTEXT_ACTION_DECAP
;
3420 case FLOW_ACTION_GOTO
: {
3421 u32 dest_chain
= act
->chain_index
;
3422 u32 max_chain
= mlx5_eswitch_get_chain_range(esw
);
3424 if (dest_chain
<= attr
->chain
) {
3425 NL_SET_ERR_MSG(extack
, "Goto earlier chain isn't supported");
3428 if (dest_chain
> max_chain
) {
3429 NL_SET_ERR_MSG(extack
, "Requested destination chain is out of supported range");
3432 action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3433 attr
->dest_chain
= dest_chain
;
3437 NL_SET_ERR_MSG_MOD(extack
, "The offload action is not supported");
3442 if (MLX5_CAP_GEN(esw
->dev
, prio_tag_required
) &&
3443 action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
) {
3444 /* For prio tag mode, replace vlan pop with rewrite vlan prio
3447 action
&= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
;
3448 err
= add_vlan_prio_tag_rewrite_action(priv
, parse_attr
, hdrs
,
3454 if (hdrs
[TCA_PEDIT_KEY_EX_CMD_SET
].pedits
||
3455 hdrs
[TCA_PEDIT_KEY_EX_CMD_ADD
].pedits
) {
3456 err
= alloc_tc_pedit_action(priv
, MLX5_FLOW_NAMESPACE_FDB
,
3457 parse_attr
, hdrs
, &action
, extack
);
3460 /* in case all pedit actions are skipped, remove the MOD_HDR
3461 * flag. we might have set split_count either by pedit or
3462 * pop/push. if there is no pop/push either, reset it too.
3464 if (parse_attr
->num_mod_hdr_actions
== 0) {
3465 action
&= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
3466 kfree(parse_attr
->mod_hdr_actions
);
3467 if (!((action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
) ||
3468 (action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
)))
3469 attr
->split_count
= 0;
3473 attr
->action
= action
;
3474 if (!actions_match_supported(priv
, flow_action
, parse_attr
, flow
, extack
))
3477 if (attr
->dest_chain
) {
3478 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
3479 NL_SET_ERR_MSG(extack
, "Mirroring goto chain rules isn't supported");
3482 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
3485 if (!(attr
->action
&
3486 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
| MLX5_FLOW_CONTEXT_ACTION_DROP
))) {
3487 NL_SET_ERR_MSG(extack
, "Rule must have at least one forward/drop action");
3491 if (attr
->split_count
> 0 && !mlx5_esw_has_fwd_fdb(priv
->mdev
)) {
3492 NL_SET_ERR_MSG_MOD(extack
,
3493 "current firmware doesn't support split rule for port mirroring");
3494 netdev_warn_once(priv
->netdev
, "current firmware doesn't support split rule for port mirroring\n");
3501 static void get_flags(int flags
, unsigned long *flow_flags
)
3503 unsigned long __flow_flags
= 0;
3505 if (flags
& MLX5_TC_FLAG(INGRESS
))
3506 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_INGRESS
);
3507 if (flags
& MLX5_TC_FLAG(EGRESS
))
3508 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_EGRESS
);
3510 if (flags
& MLX5_TC_FLAG(ESW_OFFLOAD
))
3511 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH
);
3512 if (flags
& MLX5_TC_FLAG(NIC_OFFLOAD
))
3513 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_NIC
);
3515 *flow_flags
= __flow_flags
;
3518 static const struct rhashtable_params tc_ht_params
= {
3519 .head_offset
= offsetof(struct mlx5e_tc_flow
, node
),
3520 .key_offset
= offsetof(struct mlx5e_tc_flow
, cookie
),
3521 .key_len
= sizeof(((struct mlx5e_tc_flow
*)0)->cookie
),
3522 .automatic_shrinking
= true,
3525 static struct rhashtable
*get_tc_ht(struct mlx5e_priv
*priv
,
3526 unsigned long flags
)
3528 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3529 struct mlx5e_rep_priv
*uplink_rpriv
;
3531 if (flags
& MLX5_TC_FLAG(ESW_OFFLOAD
)) {
3532 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
3533 return &uplink_rpriv
->uplink_priv
.tc_ht
;
3534 } else /* NIC offload */
3535 return &priv
->fs
.tc
.ht
;
3538 static bool is_peer_flow_needed(struct mlx5e_tc_flow
*flow
)
3540 struct mlx5_esw_flow_attr
*attr
= flow
->esw_attr
;
3541 bool is_rep_ingress
= attr
->in_rep
->vport
!= MLX5_VPORT_UPLINK
&&
3542 flow_flag_test(flow
, INGRESS
);
3543 bool act_is_encap
= !!(attr
->action
&
3544 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT
);
3545 bool esw_paired
= mlx5_devcom_is_paired(attr
->in_mdev
->priv
.devcom
,
3546 MLX5_DEVCOM_ESW_OFFLOADS
);
3551 if ((mlx5_lag_is_sriov(attr
->in_mdev
) ||
3552 mlx5_lag_is_multipath(attr
->in_mdev
)) &&
3553 (is_rep_ingress
|| act_is_encap
))
3560 mlx5e_alloc_flow(struct mlx5e_priv
*priv
, int attr_size
,
3561 struct flow_cls_offload
*f
, unsigned long flow_flags
,
3562 struct mlx5e_tc_flow_parse_attr
**__parse_attr
,
3563 struct mlx5e_tc_flow
**__flow
)
3565 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3566 struct mlx5e_tc_flow
*flow
;
3569 flow
= kzalloc(sizeof(*flow
) + attr_size
, GFP_KERNEL
);
3570 parse_attr
= kvzalloc(sizeof(*parse_attr
), GFP_KERNEL
);
3571 if (!parse_attr
|| !flow
) {
3576 flow
->cookie
= f
->cookie
;
3577 flow
->flags
= flow_flags
;
3579 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++)
3580 INIT_LIST_HEAD(&flow
->encaps
[out_index
].list
);
3581 INIT_LIST_HEAD(&flow
->mod_hdr
);
3582 INIT_LIST_HEAD(&flow
->hairpin
);
3583 refcount_set(&flow
->refcnt
, 1);
3584 init_completion(&flow
->init_done
);
3587 *__parse_attr
= parse_attr
;
3598 mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr
*esw_attr
,
3599 struct mlx5e_priv
*priv
,
3600 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3601 struct flow_cls_offload
*f
,
3602 struct mlx5_eswitch_rep
*in_rep
,
3603 struct mlx5_core_dev
*in_mdev
)
3605 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3607 esw_attr
->parse_attr
= parse_attr
;
3608 esw_attr
->chain
= f
->common
.chain_index
;
3609 esw_attr
->prio
= f
->common
.prio
;
3611 esw_attr
->in_rep
= in_rep
;
3612 esw_attr
->in_mdev
= in_mdev
;
3614 if (MLX5_CAP_ESW(esw
->dev
, counter_eswitch_affinity
) ==
3615 MLX5_COUNTER_SOURCE_ESWITCH
)
3616 esw_attr
->counter_dev
= in_mdev
;
3618 esw_attr
->counter_dev
= priv
->mdev
;
3621 static struct mlx5e_tc_flow
*
3622 __mlx5e_add_fdb_flow(struct mlx5e_priv
*priv
,
3623 struct flow_cls_offload
*f
,
3624 unsigned long flow_flags
,
3625 struct net_device
*filter_dev
,
3626 struct mlx5_eswitch_rep
*in_rep
,
3627 struct mlx5_core_dev
*in_mdev
)
3629 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
3630 struct netlink_ext_ack
*extack
= f
->common
.extack
;
3631 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3632 struct mlx5e_tc_flow
*flow
;
3635 flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH
);
3636 attr_size
= sizeof(struct mlx5_esw_flow_attr
);
3637 err
= mlx5e_alloc_flow(priv
, attr_size
, f
, flow_flags
,
3638 &parse_attr
, &flow
);
3642 parse_attr
->filter_dev
= filter_dev
;
3643 mlx5e_flow_esw_attr_init(flow
->esw_attr
,
3645 f
, in_rep
, in_mdev
);
3647 err
= parse_cls_flower(flow
->priv
, flow
, &parse_attr
->spec
,
3652 err
= parse_tc_fdb_actions(priv
, &rule
->action
, flow
, extack
);
3656 err
= mlx5e_tc_add_fdb_flow(priv
, flow
, extack
);
3657 complete_all(&flow
->init_done
);
3659 if (!(err
== -ENETUNREACH
&& mlx5_lag_is_multipath(in_mdev
)))
3662 add_unready_flow(flow
);
3668 mlx5e_flow_put(priv
, flow
);
3670 return ERR_PTR(err
);
3673 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload
*f
,
3674 struct mlx5e_tc_flow
*flow
,
3675 unsigned long flow_flags
)
3677 struct mlx5e_priv
*priv
= flow
->priv
, *peer_priv
;
3678 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
, *peer_esw
;
3679 struct mlx5_devcom
*devcom
= priv
->mdev
->priv
.devcom
;
3680 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3681 struct mlx5e_rep_priv
*peer_urpriv
;
3682 struct mlx5e_tc_flow
*peer_flow
;
3683 struct mlx5_core_dev
*in_mdev
;
3686 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
3690 peer_urpriv
= mlx5_eswitch_get_uplink_priv(peer_esw
, REP_ETH
);
3691 peer_priv
= netdev_priv(peer_urpriv
->netdev
);
3693 /* in_mdev is assigned of which the packet originated from.
3694 * So packets redirected to uplink use the same mdev of the
3695 * original flow and packets redirected from uplink use the
3698 if (flow
->esw_attr
->in_rep
->vport
== MLX5_VPORT_UPLINK
)
3699 in_mdev
= peer_priv
->mdev
;
3701 in_mdev
= priv
->mdev
;
3703 parse_attr
= flow
->esw_attr
->parse_attr
;
3704 peer_flow
= __mlx5e_add_fdb_flow(peer_priv
, f
, flow_flags
,
3705 parse_attr
->filter_dev
,
3706 flow
->esw_attr
->in_rep
, in_mdev
);
3707 if (IS_ERR(peer_flow
)) {
3708 err
= PTR_ERR(peer_flow
);
3712 flow
->peer_flow
= peer_flow
;
3713 flow_flag_set(flow
, DUP
);
3714 mutex_lock(&esw
->offloads
.peer_mutex
);
3715 list_add_tail(&flow
->peer
, &esw
->offloads
.peer_flows
);
3716 mutex_unlock(&esw
->offloads
.peer_mutex
);
3719 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
3724 mlx5e_add_fdb_flow(struct mlx5e_priv
*priv
,
3725 struct flow_cls_offload
*f
,
3726 unsigned long flow_flags
,
3727 struct net_device
*filter_dev
,
3728 struct mlx5e_tc_flow
**__flow
)
3730 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3731 struct mlx5_eswitch_rep
*in_rep
= rpriv
->rep
;
3732 struct mlx5_core_dev
*in_mdev
= priv
->mdev
;
3733 struct mlx5e_tc_flow
*flow
;
3736 flow
= __mlx5e_add_fdb_flow(priv
, f
, flow_flags
, filter_dev
, in_rep
,
3739 return PTR_ERR(flow
);
3741 if (is_peer_flow_needed(flow
)) {
3742 err
= mlx5e_tc_add_fdb_peer_flow(f
, flow
, flow_flags
);
3744 mlx5e_tc_del_fdb_flow(priv
, flow
);
3758 mlx5e_add_nic_flow(struct mlx5e_priv
*priv
,
3759 struct flow_cls_offload
*f
,
3760 unsigned long flow_flags
,
3761 struct net_device
*filter_dev
,
3762 struct mlx5e_tc_flow
**__flow
)
3764 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
3765 struct netlink_ext_ack
*extack
= f
->common
.extack
;
3766 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3767 struct mlx5e_tc_flow
*flow
;
3770 /* multi-chain not supported for NIC rules */
3771 if (!tc_cls_can_offload_and_chain0(priv
->netdev
, &f
->common
))
3774 flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_NIC
);
3775 attr_size
= sizeof(struct mlx5_nic_flow_attr
);
3776 err
= mlx5e_alloc_flow(priv
, attr_size
, f
, flow_flags
,
3777 &parse_attr
, &flow
);
3781 parse_attr
->filter_dev
= filter_dev
;
3782 err
= parse_cls_flower(flow
->priv
, flow
, &parse_attr
->spec
,
3787 err
= parse_tc_nic_actions(priv
, &rule
->action
, parse_attr
, flow
, extack
);
3791 err
= mlx5e_tc_add_nic_flow(priv
, parse_attr
, flow
, extack
);
3795 flow_flag_set(flow
, OFFLOADED
);
3802 mlx5e_flow_put(priv
, flow
);
3809 mlx5e_tc_add_flow(struct mlx5e_priv
*priv
,
3810 struct flow_cls_offload
*f
,
3811 unsigned long flags
,
3812 struct net_device
*filter_dev
,
3813 struct mlx5e_tc_flow
**flow
)
3815 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
3816 unsigned long flow_flags
;
3819 get_flags(flags
, &flow_flags
);
3821 if (!tc_can_offload_extack(priv
->netdev
, f
->common
.extack
))
3824 if (esw
&& esw
->mode
== MLX5_ESWITCH_OFFLOADS
)
3825 err
= mlx5e_add_fdb_flow(priv
, f
, flow_flags
,
3828 err
= mlx5e_add_nic_flow(priv
, f
, flow_flags
,
3834 int mlx5e_configure_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
3835 struct flow_cls_offload
*f
, unsigned long flags
)
3837 struct netlink_ext_ack
*extack
= f
->common
.extack
;
3838 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
3839 struct mlx5e_tc_flow
*flow
;
3843 flow
= rhashtable_lookup(tc_ht
, &f
->cookie
, tc_ht_params
);
3846 NL_SET_ERR_MSG_MOD(extack
,
3847 "flow cookie already exists, ignoring");
3848 netdev_warn_once(priv
->netdev
,
3849 "flow cookie %lx already exists, ignoring\n",
3855 trace_mlx5e_configure_flower(f
);
3856 err
= mlx5e_tc_add_flow(priv
, f
, flags
, dev
, &flow
);
3860 err
= rhashtable_lookup_insert_fast(tc_ht
, &flow
->node
, tc_ht_params
);
3867 mlx5e_flow_put(priv
, flow
);
3872 static bool same_flow_direction(struct mlx5e_tc_flow
*flow
, int flags
)
3874 bool dir_ingress
= !!(flags
& MLX5_TC_FLAG(INGRESS
));
3875 bool dir_egress
= !!(flags
& MLX5_TC_FLAG(EGRESS
));
3877 return flow_flag_test(flow
, INGRESS
) == dir_ingress
&&
3878 flow_flag_test(flow
, EGRESS
) == dir_egress
;
3881 int mlx5e_delete_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
3882 struct flow_cls_offload
*f
, unsigned long flags
)
3884 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
3885 struct mlx5e_tc_flow
*flow
;
3889 flow
= rhashtable_lookup_fast(tc_ht
, &f
->cookie
, tc_ht_params
);
3890 if (!flow
|| !same_flow_direction(flow
, flags
)) {
3895 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
3898 if (flow_flag_test_and_set(flow
, DELETED
)) {
3902 rhashtable_remove_fast(tc_ht
, &flow
->node
, tc_ht_params
);
3905 trace_mlx5e_delete_flower(f
);
3906 mlx5e_flow_put(priv
, flow
);
3915 int mlx5e_stats_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
3916 struct flow_cls_offload
*f
, unsigned long flags
)
3918 struct mlx5_devcom
*devcom
= priv
->mdev
->priv
.devcom
;
3919 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
3920 struct mlx5_eswitch
*peer_esw
;
3921 struct mlx5e_tc_flow
*flow
;
3922 struct mlx5_fc
*counter
;
3929 flow
= mlx5e_flow_get(rhashtable_lookup(tc_ht
, &f
->cookie
,
3933 return PTR_ERR(flow
);
3935 if (!same_flow_direction(flow
, flags
)) {
3940 if (mlx5e_is_offloaded_flow(flow
)) {
3941 counter
= mlx5e_tc_get_counter(flow
);
3945 mlx5_fc_query_cached(counter
, &bytes
, &packets
, &lastuse
);
3948 /* Under multipath it's possible for one rule to be currently
3949 * un-offloaded while the other rule is offloaded.
3951 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
3955 if (flow_flag_test(flow
, DUP
) &&
3956 flow_flag_test(flow
->peer_flow
, OFFLOADED
)) {
3961 counter
= mlx5e_tc_get_counter(flow
->peer_flow
);
3963 goto no_peer_counter
;
3964 mlx5_fc_query_cached(counter
, &bytes2
, &packets2
, &lastuse2
);
3967 packets
+= packets2
;
3968 lastuse
= max_t(u64
, lastuse
, lastuse2
);
3972 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
3974 flow_stats_update(&f
->stats
, bytes
, packets
, lastuse
);
3975 trace_mlx5e_stats_flower(f
);
3977 mlx5e_flow_put(priv
, flow
);
3981 static int apply_police_params(struct mlx5e_priv
*priv
, u64 rate
,
3982 struct netlink_ext_ack
*extack
)
3984 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3985 struct mlx5_eswitch
*esw
;
3990 vport_num
= rpriv
->rep
->vport
;
3991 if (vport_num
>= MLX5_VPORT_ECPF
) {
3992 NL_SET_ERR_MSG_MOD(extack
,
3993 "Ingress rate limit is supported only for Eswitch ports connected to VFs");
3997 esw
= priv
->mdev
->priv
.eswitch
;
3998 /* rate is given in bytes/sec.
3999 * First convert to bits/sec and then round to the nearest mbit/secs.
4000 * mbit means million bits.
4001 * Moreover, if rate is non zero we choose to configure to a minimum of
4005 rate
= (rate
* BITS_PER_BYTE
) + 500000;
4006 rate_mbps
= max_t(u32
, do_div(rate
, 1000000), 1);
4009 err
= mlx5_esw_modify_vport_rate(esw
, vport_num
, rate_mbps
);
4011 NL_SET_ERR_MSG_MOD(extack
, "failed applying action to hardware");
4016 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv
*priv
,
4017 struct flow_action
*flow_action
,
4018 struct netlink_ext_ack
*extack
)
4020 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4021 const struct flow_action_entry
*act
;
4025 if (!flow_action_has_entries(flow_action
)) {
4026 NL_SET_ERR_MSG_MOD(extack
, "matchall called with no action");
4030 if (!flow_offload_has_one_action(flow_action
)) {
4031 NL_SET_ERR_MSG_MOD(extack
, "matchall policing support only a single action");
4035 flow_action_for_each(i
, act
, flow_action
) {
4037 case FLOW_ACTION_POLICE
:
4038 err
= apply_police_params(priv
, act
->police
.rate_bytes_ps
, extack
);
4042 rpriv
->prev_vf_vport_stats
= priv
->stats
.vf_vport
;
4045 NL_SET_ERR_MSG_MOD(extack
, "mlx5 supports only police action for matchall");
4053 int mlx5e_tc_configure_matchall(struct mlx5e_priv
*priv
,
4054 struct tc_cls_matchall_offload
*ma
)
4056 struct netlink_ext_ack
*extack
= ma
->common
.extack
;
4058 if (ma
->common
.prio
!= 1) {
4059 NL_SET_ERR_MSG_MOD(extack
, "only priority 1 is supported");
4063 return scan_tc_matchall_fdb_actions(priv
, &ma
->rule
->action
, extack
);
4066 int mlx5e_tc_delete_matchall(struct mlx5e_priv
*priv
,
4067 struct tc_cls_matchall_offload
*ma
)
4069 struct netlink_ext_ack
*extack
= ma
->common
.extack
;
4071 return apply_police_params(priv
, 0, extack
);
4074 void mlx5e_tc_stats_matchall(struct mlx5e_priv
*priv
,
4075 struct tc_cls_matchall_offload
*ma
)
4077 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4078 struct rtnl_link_stats64 cur_stats
;
4082 cur_stats
= priv
->stats
.vf_vport
;
4083 dpkts
= cur_stats
.rx_packets
- rpriv
->prev_vf_vport_stats
.rx_packets
;
4084 dbytes
= cur_stats
.rx_bytes
- rpriv
->prev_vf_vport_stats
.rx_bytes
;
4085 rpriv
->prev_vf_vport_stats
= cur_stats
;
4086 flow_stats_update(&ma
->stats
, dpkts
, dbytes
, jiffies
);
4089 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv
*priv
,
4090 struct mlx5e_priv
*peer_priv
)
4092 struct mlx5_core_dev
*peer_mdev
= peer_priv
->mdev
;
4093 struct mlx5e_hairpin_entry
*hpe
, *tmp
;
4094 LIST_HEAD(init_wait_list
);
4098 if (!same_hw_devs(priv
, peer_priv
))
4101 peer_vhca_id
= MLX5_CAP_GEN(peer_mdev
, vhca_id
);
4103 mutex_lock(&priv
->fs
.tc
.hairpin_tbl_lock
);
4104 hash_for_each(priv
->fs
.tc
.hairpin_tbl
, bkt
, hpe
, hairpin_hlist
)
4105 if (refcount_inc_not_zero(&hpe
->refcnt
))
4106 list_add(&hpe
->dead_peer_wait_list
, &init_wait_list
);
4107 mutex_unlock(&priv
->fs
.tc
.hairpin_tbl_lock
);
4109 list_for_each_entry_safe(hpe
, tmp
, &init_wait_list
, dead_peer_wait_list
) {
4110 wait_for_completion(&hpe
->res_ready
);
4111 if (!IS_ERR_OR_NULL(hpe
->hp
) && hpe
->peer_vhca_id
== peer_vhca_id
)
4112 mlx5_core_hairpin_clear_dead_peer(hpe
->hp
->pair
);
4114 mlx5e_hairpin_put(priv
, hpe
);
4118 static int mlx5e_tc_netdev_event(struct notifier_block
*this,
4119 unsigned long event
, void *ptr
)
4121 struct net_device
*ndev
= netdev_notifier_info_to_dev(ptr
);
4122 struct mlx5e_flow_steering
*fs
;
4123 struct mlx5e_priv
*peer_priv
;
4124 struct mlx5e_tc_table
*tc
;
4125 struct mlx5e_priv
*priv
;
4127 if (ndev
->netdev_ops
!= &mlx5e_netdev_ops
||
4128 event
!= NETDEV_UNREGISTER
||
4129 ndev
->reg_state
== NETREG_REGISTERED
)
4132 tc
= container_of(this, struct mlx5e_tc_table
, netdevice_nb
);
4133 fs
= container_of(tc
, struct mlx5e_flow_steering
, tc
);
4134 priv
= container_of(fs
, struct mlx5e_priv
, fs
);
4135 peer_priv
= netdev_priv(ndev
);
4136 if (priv
== peer_priv
||
4137 !(priv
->netdev
->features
& NETIF_F_HW_TC
))
4140 mlx5e_tc_hairpin_update_dead_peer(priv
, peer_priv
);
4145 int mlx5e_tc_nic_init(struct mlx5e_priv
*priv
)
4147 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
4150 mutex_init(&tc
->t_lock
);
4151 mutex_init(&tc
->mod_hdr
.lock
);
4152 hash_init(tc
->mod_hdr
.hlist
);
4153 mutex_init(&tc
->hairpin_tbl_lock
);
4154 hash_init(tc
->hairpin_tbl
);
4156 err
= rhashtable_init(&tc
->ht
, &tc_ht_params
);
4160 tc
->netdevice_nb
.notifier_call
= mlx5e_tc_netdev_event
;
4161 if (register_netdevice_notifier(&tc
->netdevice_nb
)) {
4162 tc
->netdevice_nb
.notifier_call
= NULL
;
4163 mlx5_core_warn(priv
->mdev
, "Failed to register netdev notifier\n");
4169 static void _mlx5e_tc_del_flow(void *ptr
, void *arg
)
4171 struct mlx5e_tc_flow
*flow
= ptr
;
4172 struct mlx5e_priv
*priv
= flow
->priv
;
4174 mlx5e_tc_del_flow(priv
, flow
);
4178 void mlx5e_tc_nic_cleanup(struct mlx5e_priv
*priv
)
4180 struct mlx5e_tc_table
*tc
= &priv
->fs
.tc
;
4182 if (tc
->netdevice_nb
.notifier_call
)
4183 unregister_netdevice_notifier(&tc
->netdevice_nb
);
4185 mutex_destroy(&tc
->mod_hdr
.lock
);
4186 mutex_destroy(&tc
->hairpin_tbl_lock
);
4188 rhashtable_destroy(&tc
->ht
);
4190 if (!IS_ERR_OR_NULL(tc
->t
)) {
4191 mlx5_destroy_flow_table(tc
->t
);
4194 mutex_destroy(&tc
->t_lock
);
4197 int mlx5e_tc_esw_init(struct rhashtable
*tc_ht
)
4199 return rhashtable_init(tc_ht
, &tc_ht_params
);
4202 void mlx5e_tc_esw_cleanup(struct rhashtable
*tc_ht
)
4204 rhashtable_free_and_destroy(tc_ht
, _mlx5e_tc_del_flow
, NULL
);
4207 int mlx5e_tc_num_filters(struct mlx5e_priv
*priv
, unsigned long flags
)
4209 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
4211 return atomic_read(&tc_ht
->nelems
);
4214 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch
*esw
)
4216 struct mlx5e_tc_flow
*flow
, *tmp
;
4218 list_for_each_entry_safe(flow
, tmp
, &esw
->offloads
.peer_flows
, peer
)
4219 __mlx5e_tc_del_fdb_peer_flow(flow
);
4222 void mlx5e_tc_reoffload_flows_work(struct work_struct
*work
)
4224 struct mlx5_rep_uplink_priv
*rpriv
=
4225 container_of(work
, struct mlx5_rep_uplink_priv
,
4226 reoffload_flows_work
);
4227 struct mlx5e_tc_flow
*flow
, *tmp
;
4229 mutex_lock(&rpriv
->unready_flows_lock
);
4230 list_for_each_entry_safe(flow
, tmp
, &rpriv
->unready_flows
, unready
) {
4231 if (!mlx5e_tc_add_fdb_flow(flow
->priv
, flow
, NULL
))
4232 unready_flow_del(flow
);
4234 mutex_unlock(&rpriv
->unready_flows_lock
);