2 * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/prefetch.h>
35 #include <linux/ipv6.h>
36 #include <linux/tcp.h>
37 #include <linux/bpf_trace.h>
38 #include <net/busy_poll.h>
45 static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp
*tstamp
)
47 return tstamp
->hwtstamp_config
.rx_filter
== HWTSTAMP_FILTER_ALL
;
50 static inline void mlx5e_read_cqe_slot(struct mlx5e_cq
*cq
, u32 cqcc
,
53 u32 ci
= cqcc
& cq
->wq
.sz_m1
;
55 memcpy(data
, mlx5_cqwq_get_wqe(&cq
->wq
, ci
), sizeof(struct mlx5_cqe64
));
58 static inline void mlx5e_read_title_slot(struct mlx5e_rq
*rq
,
59 struct mlx5e_cq
*cq
, u32 cqcc
)
61 mlx5e_read_cqe_slot(cq
, cqcc
, &cq
->title
);
62 cq
->decmprs_left
= be32_to_cpu(cq
->title
.byte_cnt
);
63 cq
->decmprs_wqe_counter
= be16_to_cpu(cq
->title
.wqe_counter
);
64 rq
->stats
.cqe_compress_blks
++;
67 static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq
*cq
, u32 cqcc
)
69 mlx5e_read_cqe_slot(cq
, cqcc
, cq
->mini_arr
);
73 static inline void mlx5e_cqes_update_owner(struct mlx5e_cq
*cq
, u32 cqcc
, int n
)
75 u8 op_own
= (cqcc
>> cq
->wq
.log_sz
) & 1;
76 u32 wq_sz
= 1 << cq
->wq
.log_sz
;
77 u32 ci
= cqcc
& cq
->wq
.sz_m1
;
78 u32 ci_top
= min_t(u32
, wq_sz
, ci
+ n
);
80 for (; ci
< ci_top
; ci
++, n
--) {
81 struct mlx5_cqe64
*cqe
= mlx5_cqwq_get_wqe(&cq
->wq
, ci
);
86 if (unlikely(ci
== wq_sz
)) {
88 for (ci
= 0; ci
< n
; ci
++) {
89 struct mlx5_cqe64
*cqe
= mlx5_cqwq_get_wqe(&cq
->wq
, ci
);
96 static inline void mlx5e_decompress_cqe(struct mlx5e_rq
*rq
,
97 struct mlx5e_cq
*cq
, u32 cqcc
)
99 cq
->title
.byte_cnt
= cq
->mini_arr
[cq
->mini_arr_idx
].byte_cnt
;
100 cq
->title
.check_sum
= cq
->mini_arr
[cq
->mini_arr_idx
].checksum
;
101 cq
->title
.op_own
&= 0xf0;
102 cq
->title
.op_own
|= 0x01 & (cqcc
>> cq
->wq
.log_sz
);
103 cq
->title
.wqe_counter
= cpu_to_be16(cq
->decmprs_wqe_counter
);
105 if (rq
->wq_type
== MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ
)
106 cq
->decmprs_wqe_counter
+=
107 mpwrq_get_cqe_consumed_strides(&cq
->title
);
109 cq
->decmprs_wqe_counter
=
110 (cq
->decmprs_wqe_counter
+ 1) & rq
->wq
.sz_m1
;
113 static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq
*rq
,
114 struct mlx5e_cq
*cq
, u32 cqcc
)
116 mlx5e_decompress_cqe(rq
, cq
, cqcc
);
117 cq
->title
.rss_hash_type
= 0;
118 cq
->title
.rss_hash_result
= 0;
121 static inline u32
mlx5e_decompress_cqes_cont(struct mlx5e_rq
*rq
,
123 int update_owner_only
,
126 u32 cqcc
= cq
->wq
.cc
+ update_owner_only
;
130 cqe_count
= min_t(u32
, cq
->decmprs_left
, budget_rem
);
132 for (i
= update_owner_only
; i
< cqe_count
;
133 i
++, cq
->mini_arr_idx
++, cqcc
++) {
134 if (cq
->mini_arr_idx
== MLX5_MINI_CQE_ARRAY_SIZE
)
135 mlx5e_read_mini_arr_slot(cq
, cqcc
);
137 mlx5e_decompress_cqe_no_hash(rq
, cq
, cqcc
);
138 rq
->handle_rx_cqe(rq
, &cq
->title
);
140 mlx5e_cqes_update_owner(cq
, cq
->wq
.cc
, cqcc
- cq
->wq
.cc
);
142 cq
->decmprs_left
-= cqe_count
;
143 rq
->stats
.cqe_compress_pkts
+= cqe_count
;
148 static inline u32
mlx5e_decompress_cqes_start(struct mlx5e_rq
*rq
,
152 mlx5e_read_title_slot(rq
, cq
, cq
->wq
.cc
);
153 mlx5e_read_mini_arr_slot(cq
, cq
->wq
.cc
+ 1);
154 mlx5e_decompress_cqe(rq
, cq
, cq
->wq
.cc
);
155 rq
->handle_rx_cqe(rq
, &cq
->title
);
158 return mlx5e_decompress_cqes_cont(rq
, cq
, 1, budget_rem
) - 1;
161 #define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT)
163 static inline bool mlx5e_rx_cache_put(struct mlx5e_rq
*rq
,
164 struct mlx5e_dma_info
*dma_info
)
166 struct mlx5e_page_cache
*cache
= &rq
->page_cache
;
167 u32 tail_next
= (cache
->tail
+ 1) & (MLX5E_CACHE_SIZE
- 1);
169 if (tail_next
== cache
->head
) {
170 rq
->stats
.cache_full
++;
174 if (unlikely(page_is_pfmemalloc(dma_info
->page
)))
177 cache
->page_cache
[cache
->tail
] = *dma_info
;
178 cache
->tail
= tail_next
;
182 static inline bool mlx5e_rx_cache_get(struct mlx5e_rq
*rq
,
183 struct mlx5e_dma_info
*dma_info
)
185 struct mlx5e_page_cache
*cache
= &rq
->page_cache
;
187 if (unlikely(cache
->head
== cache
->tail
)) {
188 rq
->stats
.cache_empty
++;
192 if (page_ref_count(cache
->page_cache
[cache
->head
].page
) != 1) {
193 rq
->stats
.cache_busy
++;
197 *dma_info
= cache
->page_cache
[cache
->head
];
198 cache
->head
= (cache
->head
+ 1) & (MLX5E_CACHE_SIZE
- 1);
199 rq
->stats
.cache_reuse
++;
201 dma_sync_single_for_device(rq
->pdev
, dma_info
->addr
,
207 static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq
*rq
,
208 struct mlx5e_dma_info
*dma_info
)
212 if (mlx5e_rx_cache_get(rq
, dma_info
))
215 page
= dev_alloc_pages(rq
->buff
.page_order
);
219 dma_info
->page
= page
;
220 dma_info
->addr
= dma_map_page(rq
->pdev
, page
, 0,
221 RQ_PAGE_SIZE(rq
), rq
->buff
.map_dir
);
222 if (unlikely(dma_mapping_error(rq
->pdev
, dma_info
->addr
))) {
230 void mlx5e_page_release(struct mlx5e_rq
*rq
, struct mlx5e_dma_info
*dma_info
,
233 if (likely(recycle
) && mlx5e_rx_cache_put(rq
, dma_info
))
236 dma_unmap_page(rq
->pdev
, dma_info
->addr
, RQ_PAGE_SIZE(rq
),
238 put_page(dma_info
->page
);
241 int mlx5e_alloc_rx_wqe(struct mlx5e_rq
*rq
, struct mlx5e_rx_wqe
*wqe
, u16 ix
)
243 struct mlx5e_dma_info
*di
= &rq
->dma_info
[ix
];
245 if (unlikely(mlx5e_page_alloc_mapped(rq
, di
)))
248 wqe
->data
.addr
= cpu_to_be64(di
->addr
+ rq
->rx_headroom
);
252 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq
*rq
, u16 ix
)
254 struct mlx5e_dma_info
*di
= &rq
->dma_info
[ix
];
256 mlx5e_page_release(rq
, di
, true);
259 static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq
*rq
)
261 return rq
->mpwqe_num_strides
>> MLX5_MPWRQ_WQE_PAGE_ORDER
;
264 static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq
*rq
,
266 struct mlx5e_mpw_info
*wi
,
267 u32 page_idx
, u32 frag_offset
,
270 unsigned int truesize
= ALIGN(len
, rq
->mpwqe_stride_sz
);
272 dma_sync_single_for_cpu(rq
->pdev
,
273 wi
->umr
.dma_info
[page_idx
].addr
+ frag_offset
,
274 len
, DMA_FROM_DEVICE
);
275 wi
->skbs_frags
[page_idx
]++;
276 skb_add_rx_frag(skb
, skb_shinfo(skb
)->nr_frags
,
277 wi
->umr
.dma_info
[page_idx
].page
, frag_offset
,
282 mlx5e_copy_skb_header_mpwqe(struct device
*pdev
,
284 struct mlx5e_mpw_info
*wi
,
285 u32 page_idx
, u32 offset
,
288 u16 headlen_pg
= min_t(u32
, headlen
, PAGE_SIZE
- offset
);
289 struct mlx5e_dma_info
*dma_info
= &wi
->umr
.dma_info
[page_idx
];
292 /* Aligning len to sizeof(long) optimizes memcpy performance */
293 len
= ALIGN(headlen_pg
, sizeof(long));
294 dma_sync_single_for_cpu(pdev
, dma_info
->addr
+ offset
, len
,
296 skb_copy_to_linear_data_offset(skb
, 0,
297 page_address(dma_info
->page
) + offset
,
299 if (unlikely(offset
+ headlen
> PAGE_SIZE
)) {
302 len
= ALIGN(headlen
- headlen_pg
, sizeof(long));
303 dma_sync_single_for_cpu(pdev
, dma_info
->addr
, len
,
305 skb_copy_to_linear_data_offset(skb
, headlen_pg
,
306 page_address(dma_info
->page
),
311 static inline void mlx5e_post_umr_wqe(struct mlx5e_rq
*rq
, u16 ix
)
313 struct mlx5e_mpw_info
*wi
= &rq
->mpwqe
.info
[ix
];
314 struct mlx5e_icosq
*sq
= &rq
->channel
->icosq
;
315 struct mlx5_wq_cyc
*wq
= &sq
->wq
;
316 struct mlx5e_umr_wqe
*wqe
;
317 u8 num_wqebbs
= DIV_ROUND_UP(sizeof(*wqe
), MLX5_SEND_WQE_BB
);
320 /* fill sq edge with nops to avoid wqe wrap around */
321 while ((pi
= (sq
->pc
& wq
->sz_m1
)) > sq
->edge
) {
322 sq
->db
.ico_wqe
[pi
].opcode
= MLX5_OPCODE_NOP
;
323 sq
->db
.ico_wqe
[pi
].num_wqebbs
= 1;
324 mlx5e_post_nop(wq
, sq
->sqn
, &sq
->pc
);
327 wqe
= mlx5_wq_cyc_get_wqe(wq
, pi
);
328 memcpy(wqe
, &wi
->umr
.wqe
, sizeof(*wqe
));
329 wqe
->ctrl
.opmod_idx_opcode
=
330 cpu_to_be32((sq
->pc
<< MLX5_WQE_CTRL_WQE_INDEX_SHIFT
) |
333 sq
->db
.ico_wqe
[pi
].opcode
= MLX5_OPCODE_UMR
;
334 sq
->db
.ico_wqe
[pi
].num_wqebbs
= num_wqebbs
;
335 sq
->pc
+= num_wqebbs
;
336 mlx5e_notify_hw(&sq
->wq
, sq
->pc
, sq
->uar_map
, &wqe
->ctrl
);
339 static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq
*rq
,
340 struct mlx5e_rx_wqe
*wqe
,
343 struct mlx5e_mpw_info
*wi
= &rq
->mpwqe
.info
[ix
];
344 u64 dma_offset
= (u64
)mlx5e_get_wqe_mtt_offset(rq
, ix
) << PAGE_SHIFT
;
345 int pg_strides
= mlx5e_mpwqe_strides_per_page(rq
);
349 for (i
= 0; i
< MLX5_MPWRQ_PAGES_PER_WQE
; i
++) {
350 struct mlx5e_dma_info
*dma_info
= &wi
->umr
.dma_info
[i
];
352 err
= mlx5e_page_alloc_mapped(rq
, dma_info
);
355 wi
->umr
.mtt
[i
] = cpu_to_be64(dma_info
->addr
| MLX5_EN_WR
);
356 page_ref_add(dma_info
->page
, pg_strides
);
357 wi
->skbs_frags
[i
] = 0;
360 wi
->consumed_strides
= 0;
361 wqe
->data
.addr
= cpu_to_be64(dma_offset
);
367 struct mlx5e_dma_info
*dma_info
= &wi
->umr
.dma_info
[i
];
369 page_ref_sub(dma_info
->page
, pg_strides
);
370 mlx5e_page_release(rq
, dma_info
, true);
376 void mlx5e_free_rx_mpwqe(struct mlx5e_rq
*rq
, struct mlx5e_mpw_info
*wi
)
378 int pg_strides
= mlx5e_mpwqe_strides_per_page(rq
);
381 for (i
= 0; i
< MLX5_MPWRQ_PAGES_PER_WQE
; i
++) {
382 struct mlx5e_dma_info
*dma_info
= &wi
->umr
.dma_info
[i
];
384 page_ref_sub(dma_info
->page
, pg_strides
- wi
->skbs_frags
[i
]);
385 mlx5e_page_release(rq
, dma_info
, true);
389 void mlx5e_post_rx_mpwqe(struct mlx5e_rq
*rq
)
391 struct mlx5_wq_ll
*wq
= &rq
->wq
;
392 struct mlx5e_rx_wqe
*wqe
= mlx5_wq_ll_get_wqe(wq
, wq
->head
);
394 clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS
, &rq
->state
);
396 if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED
, &rq
->state
))) {
397 mlx5e_free_rx_mpwqe(rq
, &rq
->mpwqe
.info
[wq
->head
]);
401 mlx5_wq_ll_push(wq
, be16_to_cpu(wqe
->next
.next_wqe_index
));
403 /* ensure wqes are visible to device before updating doorbell record */
406 mlx5_wq_ll_update_db_record(wq
);
409 int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq
*rq
, struct mlx5e_rx_wqe
*wqe
, u16 ix
)
413 err
= mlx5e_alloc_rx_umr_mpwqe(rq
, wqe
, ix
);
416 set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS
, &rq
->state
);
417 mlx5e_post_umr_wqe(rq
, ix
);
421 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq
*rq
, u16 ix
)
423 struct mlx5e_mpw_info
*wi
= &rq
->mpwqe
.info
[ix
];
425 mlx5e_free_rx_mpwqe(rq
, wi
);
428 #define RQ_CANNOT_POST(rq) \
429 (!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state) || \
430 test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
432 bool mlx5e_post_rx_wqes(struct mlx5e_rq
*rq
)
434 struct mlx5_wq_ll
*wq
= &rq
->wq
;
436 if (unlikely(RQ_CANNOT_POST(rq
)))
439 while (!mlx5_wq_ll_is_full(wq
)) {
440 struct mlx5e_rx_wqe
*wqe
= mlx5_wq_ll_get_wqe(wq
, wq
->head
);
443 err
= rq
->alloc_wqe(rq
, wqe
, wq
->head
);
447 rq
->stats
.buff_alloc_err
++;
451 mlx5_wq_ll_push(wq
, be16_to_cpu(wqe
->next
.next_wqe_index
));
454 /* ensure wqes are visible to device before updating doorbell record */
457 mlx5_wq_ll_update_db_record(wq
);
459 return !mlx5_wq_ll_is_full(wq
);
462 static void mlx5e_lro_update_hdr(struct sk_buff
*skb
, struct mlx5_cqe64
*cqe
,
465 struct ethhdr
*eth
= (struct ethhdr
*)(skb
->data
);
467 struct ipv6hdr
*ipv6
;
469 int network_depth
= 0;
473 u8 l4_hdr_type
= get_cqe_l4_hdr_type(cqe
);
474 int tcp_ack
= ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA
== l4_hdr_type
) ||
475 (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA
== l4_hdr_type
));
477 skb
->mac_len
= ETH_HLEN
;
478 proto
= __vlan_get_protocol(skb
, eth
->h_proto
, &network_depth
);
480 ipv4
= (struct iphdr
*)(skb
->data
+ network_depth
);
481 ipv6
= (struct ipv6hdr
*)(skb
->data
+ network_depth
);
482 tot_len
= cqe_bcnt
- network_depth
;
484 if (proto
== htons(ETH_P_IP
)) {
485 tcp
= (struct tcphdr
*)(skb
->data
+ network_depth
+
486 sizeof(struct iphdr
));
488 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
490 tcp
= (struct tcphdr
*)(skb
->data
+ network_depth
+
491 sizeof(struct ipv6hdr
));
493 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
496 if (get_cqe_lro_tcppsh(cqe
))
501 tcp
->ack_seq
= cqe
->lro_ack_seq_num
;
502 tcp
->window
= cqe
->lro_tcp_win
;
506 ipv4
->ttl
= cqe
->lro_min_ttl
;
507 ipv4
->tot_len
= cpu_to_be16(tot_len
);
509 ipv4
->check
= ip_fast_csum((unsigned char *)ipv4
,
512 ipv6
->hop_limit
= cqe
->lro_min_ttl
;
513 ipv6
->payload_len
= cpu_to_be16(tot_len
-
514 sizeof(struct ipv6hdr
));
518 static inline void mlx5e_skb_set_hash(struct mlx5_cqe64
*cqe
,
521 u8 cht
= cqe
->rss_hash_type
;
522 int ht
= (cht
& CQE_RSS_HTYPE_L4
) ? PKT_HASH_TYPE_L4
:
523 (cht
& CQE_RSS_HTYPE_IP
) ? PKT_HASH_TYPE_L3
:
525 skb_set_hash(skb
, be32_to_cpu(cqe
->rss_hash_result
), ht
);
528 static inline bool is_first_ethertype_ip(struct sk_buff
*skb
)
530 __be16 ethertype
= ((struct ethhdr
*)skb
->data
)->h_proto
;
532 return (ethertype
== htons(ETH_P_IP
) || ethertype
== htons(ETH_P_IPV6
));
535 static inline void mlx5e_handle_csum(struct net_device
*netdev
,
536 struct mlx5_cqe64
*cqe
,
541 if (unlikely(!(netdev
->features
& NETIF_F_RXCSUM
)))
545 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
549 if (is_first_ethertype_ip(skb
)) {
550 skb
->ip_summed
= CHECKSUM_COMPLETE
;
551 skb
->csum
= csum_unfold((__force __sum16
)cqe
->check_sum
);
552 rq
->stats
.csum_complete
++;
556 if (likely((cqe
->hds_ip_ext
& CQE_L3_OK
) &&
557 (cqe
->hds_ip_ext
& CQE_L4_OK
))) {
558 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
559 if (cqe_is_tunneled(cqe
)) {
561 skb
->encapsulation
= 1;
562 rq
->stats
.csum_unnecessary_inner
++;
567 skb
->ip_summed
= CHECKSUM_NONE
;
568 rq
->stats
.csum_none
++;
571 static inline void mlx5e_build_rx_skb(struct mlx5_cqe64
*cqe
,
576 struct net_device
*netdev
= rq
->netdev
;
577 struct mlx5e_tstamp
*tstamp
= rq
->tstamp
;
580 lro_num_seg
= be32_to_cpu(cqe
->srqn
) >> 24;
581 if (lro_num_seg
> 1) {
582 mlx5e_lro_update_hdr(skb
, cqe
, cqe_bcnt
);
583 skb_shinfo(skb
)->gso_size
= DIV_ROUND_UP(cqe_bcnt
, lro_num_seg
);
584 /* Subtract one since we already counted this as one
585 * "regular" packet in mlx5e_complete_rx_cqe()
587 rq
->stats
.packets
+= lro_num_seg
- 1;
588 rq
->stats
.lro_packets
++;
589 rq
->stats
.lro_bytes
+= cqe_bcnt
;
592 if (unlikely(mlx5e_rx_hw_stamp(tstamp
)))
593 mlx5e_fill_hwstamp(tstamp
, get_cqe_ts(cqe
), skb_hwtstamps(skb
));
595 skb_record_rx_queue(skb
, rq
->ix
);
597 if (likely(netdev
->features
& NETIF_F_RXHASH
))
598 mlx5e_skb_set_hash(cqe
, skb
);
600 if (cqe_has_vlan(cqe
))
601 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
),
602 be16_to_cpu(cqe
->vlan_info
));
604 skb
->mark
= be32_to_cpu(cqe
->sop_drop_qpn
) & MLX5E_TC_FLOW_ID_MASK
;
606 mlx5e_handle_csum(netdev
, cqe
, rq
, skb
, !!lro_num_seg
);
607 skb
->protocol
= eth_type_trans(skb
, netdev
);
610 static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq
*rq
,
611 struct mlx5_cqe64
*cqe
,
616 rq
->stats
.bytes
+= cqe_bcnt
;
617 mlx5e_build_rx_skb(cqe
, cqe_bcnt
, rq
, skb
);
620 static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq
*sq
)
622 struct mlx5_wq_cyc
*wq
= &sq
->wq
;
623 struct mlx5e_tx_wqe
*wqe
;
624 u16 pi
= (sq
->pc
- 1) & wq
->sz_m1
; /* last pi */
626 wqe
= mlx5_wq_cyc_get_wqe(wq
, pi
);
628 mlx5e_notify_hw(wq
, sq
->pc
, sq
->uar_map
, &wqe
->ctrl
);
631 static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq
*rq
,
632 struct mlx5e_dma_info
*di
,
633 const struct xdp_buff
*xdp
)
635 struct mlx5e_xdpsq
*sq
= &rq
->xdpsq
;
636 struct mlx5_wq_cyc
*wq
= &sq
->wq
;
637 u16 pi
= sq
->pc
& wq
->sz_m1
;
638 struct mlx5e_tx_wqe
*wqe
= mlx5_wq_cyc_get_wqe(wq
, pi
);
640 struct mlx5_wqe_ctrl_seg
*cseg
= &wqe
->ctrl
;
641 struct mlx5_wqe_eth_seg
*eseg
= &wqe
->eth
;
642 struct mlx5_wqe_data_seg
*dseg
;
644 ptrdiff_t data_offset
= xdp
->data
- xdp
->data_hard_start
;
645 dma_addr_t dma_addr
= di
->addr
+ data_offset
;
646 unsigned int dma_len
= xdp
->data_end
- xdp
->data
;
650 if (unlikely(dma_len
< MLX5E_XDP_MIN_INLINE
||
651 MLX5E_SW2HW_MTU(rq
->netdev
->mtu
) < dma_len
)) {
652 rq
->stats
.xdp_drop
++;
653 mlx5e_page_release(rq
, di
, true);
657 if (unlikely(!mlx5e_wqc_has_room_for(wq
, sq
->cc
, sq
->pc
, 1))) {
658 if (sq
->db
.doorbell
) {
659 /* SQ is full, ring doorbell */
660 mlx5e_xmit_xdp_doorbell(sq
);
661 sq
->db
.doorbell
= false;
663 rq
->stats
.xdp_tx_full
++;
664 mlx5e_page_release(rq
, di
, true);
668 dma_sync_single_for_device(sq
->pdev
, dma_addr
, dma_len
, PCI_DMA_TODEVICE
);
672 dseg
= (struct mlx5_wqe_data_seg
*)eseg
+ 1;
674 /* copy the inline part if required */
675 if (sq
->min_inline_mode
!= MLX5_INLINE_MODE_NONE
) {
676 memcpy(eseg
->inline_hdr
.start
, xdp
->data
, MLX5E_XDP_MIN_INLINE
);
677 eseg
->inline_hdr
.sz
= cpu_to_be16(MLX5E_XDP_MIN_INLINE
);
678 dma_len
-= MLX5E_XDP_MIN_INLINE
;
679 dma_addr
+= MLX5E_XDP_MIN_INLINE
;
683 /* write the dma part */
684 dseg
->addr
= cpu_to_be64(dma_addr
);
685 dseg
->byte_count
= cpu_to_be32(dma_len
);
687 cseg
->opmod_idx_opcode
= cpu_to_be32((sq
->pc
<< 8) | MLX5_OPCODE_SEND
);
692 sq
->db
.doorbell
= true;
697 /* returns true if packet was consumed by xdp */
698 static inline int mlx5e_xdp_handle(struct mlx5e_rq
*rq
,
699 struct mlx5e_dma_info
*di
,
700 void *va
, u16
*rx_headroom
, u32
*len
)
702 const struct bpf_prog
*prog
= READ_ONCE(rq
->xdp_prog
);
709 xdp
.data
= va
+ *rx_headroom
;
710 xdp
.data_end
= xdp
.data
+ *len
;
711 xdp
.data_hard_start
= va
;
713 act
= bpf_prog_run_xdp(prog
, &xdp
);
716 *rx_headroom
= xdp
.data
- xdp
.data_hard_start
;
717 *len
= xdp
.data_end
- xdp
.data
;
720 if (unlikely(!mlx5e_xmit_xdp_frame(rq
, di
, &xdp
)))
721 trace_xdp_exception(rq
->netdev
, prog
, act
);
724 bpf_warn_invalid_xdp_action(act
);
726 trace_xdp_exception(rq
->netdev
, prog
, act
);
728 rq
->stats
.xdp_drop
++;
729 mlx5e_page_release(rq
, di
, true);
735 struct sk_buff
*skb_from_cqe(struct mlx5e_rq
*rq
, struct mlx5_cqe64
*cqe
,
736 u16 wqe_counter
, u32 cqe_bcnt
)
738 struct mlx5e_dma_info
*di
;
741 u16 rx_headroom
= rq
->rx_headroom
;
744 di
= &rq
->dma_info
[wqe_counter
];
745 va
= page_address(di
->page
);
746 data
= va
+ rx_headroom
;
748 dma_sync_single_range_for_cpu(rq
->pdev
,
755 if (unlikely((cqe
->op_own
>> 4) != MLX5_CQE_RESP_SEND
)) {
757 mlx5e_page_release(rq
, di
, true);
762 consumed
= mlx5e_xdp_handle(rq
, di
, va
, &rx_headroom
, &cqe_bcnt
);
765 return NULL
; /* page/packet was consumed by XDP */
767 skb
= build_skb(va
, RQ_PAGE_SIZE(rq
));
768 if (unlikely(!skb
)) {
769 rq
->stats
.buff_alloc_err
++;
770 mlx5e_page_release(rq
, di
, true);
774 /* queue up for recycling ..*/
775 page_ref_inc(di
->page
);
776 mlx5e_page_release(rq
, di
, true);
778 skb_reserve(skb
, rx_headroom
);
779 skb_put(skb
, cqe_bcnt
);
784 void mlx5e_handle_rx_cqe(struct mlx5e_rq
*rq
, struct mlx5_cqe64
*cqe
)
786 struct mlx5e_rx_wqe
*wqe
;
787 __be16 wqe_counter_be
;
792 wqe_counter_be
= cqe
->wqe_counter
;
793 wqe_counter
= be16_to_cpu(wqe_counter_be
);
794 wqe
= mlx5_wq_ll_get_wqe(&rq
->wq
, wqe_counter
);
795 cqe_bcnt
= be32_to_cpu(cqe
->byte_cnt
);
797 skb
= skb_from_cqe(rq
, cqe
, wqe_counter
, cqe_bcnt
);
801 mlx5e_complete_rx_cqe(rq
, cqe
, cqe_bcnt
, skb
);
802 napi_gro_receive(rq
->cq
.napi
, skb
);
805 mlx5_wq_ll_pop(&rq
->wq
, wqe_counter_be
,
806 &wqe
->next
.next_wqe_index
);
809 void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq
*rq
, struct mlx5_cqe64
*cqe
)
811 struct net_device
*netdev
= rq
->netdev
;
812 struct mlx5e_priv
*priv
= netdev_priv(netdev
);
813 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
814 struct mlx5_eswitch_rep
*rep
= rpriv
->rep
;
815 struct mlx5e_rx_wqe
*wqe
;
817 __be16 wqe_counter_be
;
821 wqe_counter_be
= cqe
->wqe_counter
;
822 wqe_counter
= be16_to_cpu(wqe_counter_be
);
823 wqe
= mlx5_wq_ll_get_wqe(&rq
->wq
, wqe_counter
);
824 cqe_bcnt
= be32_to_cpu(cqe
->byte_cnt
);
826 skb
= skb_from_cqe(rq
, cqe
, wqe_counter
, cqe_bcnt
);
830 mlx5e_complete_rx_cqe(rq
, cqe
, cqe_bcnt
, skb
);
832 if (rep
->vlan
&& skb_vlan_tag_present(skb
))
835 napi_gro_receive(rq
->cq
.napi
, skb
);
838 mlx5_wq_ll_pop(&rq
->wq
, wqe_counter_be
,
839 &wqe
->next
.next_wqe_index
);
842 static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq
*rq
,
843 struct mlx5_cqe64
*cqe
,
844 struct mlx5e_mpw_info
*wi
,
848 u16 stride_ix
= mpwrq_get_cqe_stride_index(cqe
);
849 u32 wqe_offset
= stride_ix
* rq
->mpwqe_stride_sz
;
850 u32 head_offset
= wqe_offset
& (PAGE_SIZE
- 1);
851 u32 page_idx
= wqe_offset
>> PAGE_SHIFT
;
852 u32 head_page_idx
= page_idx
;
853 u16 headlen
= min_t(u16
, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD
, cqe_bcnt
);
854 u32 frag_offset
= head_offset
+ headlen
;
855 u16 byte_cnt
= cqe_bcnt
- headlen
;
857 if (unlikely(frag_offset
>= PAGE_SIZE
)) {
859 frag_offset
-= PAGE_SIZE
;
863 u32 pg_consumed_bytes
=
864 min_t(u32
, PAGE_SIZE
- frag_offset
, byte_cnt
);
866 mlx5e_add_skb_frag_mpwqe(rq
, skb
, wi
, page_idx
, frag_offset
,
868 byte_cnt
-= pg_consumed_bytes
;
873 mlx5e_copy_skb_header_mpwqe(rq
->pdev
, skb
, wi
, head_page_idx
,
874 head_offset
, headlen
);
875 /* skb linear part was allocated with headlen and aligned to long */
876 skb
->tail
+= headlen
;
880 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq
*rq
, struct mlx5_cqe64
*cqe
)
882 u16 cstrides
= mpwrq_get_cqe_consumed_strides(cqe
);
883 u16 wqe_id
= be16_to_cpu(cqe
->wqe_id
);
884 struct mlx5e_mpw_info
*wi
= &rq
->mpwqe
.info
[wqe_id
];
885 struct mlx5e_rx_wqe
*wqe
= mlx5_wq_ll_get_wqe(&rq
->wq
, wqe_id
);
889 wi
->consumed_strides
+= cstrides
;
891 if (unlikely((cqe
->op_own
>> 4) != MLX5_CQE_RESP_SEND
)) {
896 if (unlikely(mpwrq_is_filler_cqe(cqe
))) {
897 rq
->stats
.mpwqe_filler
++;
901 skb
= napi_alloc_skb(rq
->cq
.napi
,
902 ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD
,
904 if (unlikely(!skb
)) {
905 rq
->stats
.buff_alloc_err
++;
909 prefetchw(skb
->data
);
910 cqe_bcnt
= mpwrq_get_cqe_byte_cnt(cqe
);
912 mlx5e_mpwqe_fill_rx_skb(rq
, cqe
, wi
, cqe_bcnt
, skb
);
913 mlx5e_complete_rx_cqe(rq
, cqe
, cqe_bcnt
, skb
);
914 napi_gro_receive(rq
->cq
.napi
, skb
);
917 if (likely(wi
->consumed_strides
< rq
->mpwqe_num_strides
))
920 mlx5e_free_rx_mpwqe(rq
, wi
);
921 mlx5_wq_ll_pop(&rq
->wq
, cqe
->wqe_id
, &wqe
->next
.next_wqe_index
);
924 int mlx5e_poll_rx_cq(struct mlx5e_cq
*cq
, int budget
)
926 struct mlx5e_rq
*rq
= container_of(cq
, struct mlx5e_rq
, cq
);
927 struct mlx5e_xdpsq
*xdpsq
= &rq
->xdpsq
;
930 if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED
, &rq
->state
)))
933 if (cq
->decmprs_left
)
934 work_done
+= mlx5e_decompress_cqes_cont(rq
, cq
, 0, budget
);
936 for (; work_done
< budget
; work_done
++) {
937 struct mlx5_cqe64
*cqe
= mlx5e_get_cqe(cq
);
942 if (mlx5_get_cqe_format(cqe
) == MLX5_COMPRESSED
) {
944 mlx5e_decompress_cqes_start(rq
, cq
,
949 mlx5_cqwq_pop(&cq
->wq
);
951 rq
->handle_rx_cqe(rq
, cqe
);
954 if (xdpsq
->db
.doorbell
) {
955 mlx5e_xmit_xdp_doorbell(xdpsq
);
956 xdpsq
->db
.doorbell
= false;
959 mlx5_cqwq_update_db_record(&cq
->wq
);
961 /* ensure cq space is freed before enabling more cqes */
967 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq
*cq
)
969 struct mlx5e_xdpsq
*sq
;
974 sq
= container_of(cq
, struct mlx5e_xdpsq
, cq
);
976 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED
, &sq
->state
)))
979 rq
= container_of(sq
, struct mlx5e_rq
, xdpsq
);
981 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
982 * otherwise a cq overrun may occur
986 for (i
= 0; i
< MLX5E_TX_CQ_POLL_BUDGET
; i
++) {
987 struct mlx5_cqe64
*cqe
;
991 cqe
= mlx5e_get_cqe(cq
);
995 mlx5_cqwq_pop(&cq
->wq
);
997 wqe_counter
= be16_to_cpu(cqe
->wqe_counter
);
1000 struct mlx5e_dma_info
*di
;
1003 last_wqe
= (sqcc
== wqe_counter
);
1005 ci
= sqcc
& sq
->wq
.sz_m1
;
1006 di
= &sq
->db
.di
[ci
];
1009 /* Recycle RX page */
1010 mlx5e_page_release(rq
, di
, true);
1011 } while (!last_wqe
);
1014 mlx5_cqwq_update_db_record(&cq
->wq
);
1016 /* ensure cq space is freed before enabling more cqes */
1020 return (i
== MLX5E_TX_CQ_POLL_BUDGET
);
1023 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq
*sq
)
1025 struct mlx5e_rq
*rq
= container_of(sq
, struct mlx5e_rq
, xdpsq
);
1026 struct mlx5e_dma_info
*di
;
1029 while (sq
->cc
!= sq
->pc
) {
1030 ci
= sq
->cc
& sq
->wq
.sz_m1
;
1031 di
= &sq
->db
.di
[ci
];
1034 mlx5e_page_release(rq
, di
, false);
1038 #ifdef CONFIG_MLX5_CORE_IPOIB
1040 #define MLX5_IB_GRH_DGID_OFFSET 24
1041 #define MLX5_IB_GRH_BYTES 40
1042 #define MLX5_IPOIB_ENCAP_LEN 4
1043 #define MLX5_GID_SIZE 16
1044 #define MLX5_IPOIB_PSEUDO_LEN 20
1045 #define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN)
1047 static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq
*rq
,
1048 struct mlx5_cqe64
*cqe
,
1050 struct sk_buff
*skb
)
1052 struct net_device
*netdev
= rq
->netdev
;
1053 char *pseudo_header
;
1057 g
= (be32_to_cpu(cqe
->flags_rqpn
) >> 28) & 3;
1058 dgid
= skb
->data
+ MLX5_IB_GRH_DGID_OFFSET
;
1059 if ((!g
) || dgid
[0] != 0xff)
1060 skb
->pkt_type
= PACKET_HOST
;
1061 else if (memcmp(dgid
, netdev
->broadcast
+ 4, MLX5_GID_SIZE
) == 0)
1062 skb
->pkt_type
= PACKET_BROADCAST
;
1064 skb
->pkt_type
= PACKET_MULTICAST
;
1066 /* TODO: IB/ipoib: Allow mcast packets from other VFs
1067 * 68996a6e760e5c74654723eeb57bf65628ae87f4
1070 skb_pull(skb
, MLX5_IB_GRH_BYTES
);
1072 skb
->protocol
= *((__be16
*)(skb
->data
));
1074 skb
->ip_summed
= CHECKSUM_COMPLETE
;
1075 skb
->csum
= csum_unfold((__force __sum16
)cqe
->check_sum
);
1077 skb_record_rx_queue(skb
, rq
->ix
);
1079 if (likely(netdev
->features
& NETIF_F_RXHASH
))
1080 mlx5e_skb_set_hash(cqe
, skb
);
1082 /* 20 bytes of ipoib header and 4 for encap existing */
1083 pseudo_header
= skb_push(skb
, MLX5_IPOIB_PSEUDO_LEN
);
1084 memset(pseudo_header
, 0, MLX5_IPOIB_PSEUDO_LEN
);
1085 skb_reset_mac_header(skb
);
1086 skb_pull(skb
, MLX5_IPOIB_HARD_LEN
);
1090 rq
->stats
.csum_complete
++;
1091 rq
->stats
.packets
++;
1092 rq
->stats
.bytes
+= cqe_bcnt
;
1095 void mlx5i_handle_rx_cqe(struct mlx5e_rq
*rq
, struct mlx5_cqe64
*cqe
)
1097 struct mlx5e_rx_wqe
*wqe
;
1098 __be16 wqe_counter_be
;
1099 struct sk_buff
*skb
;
1103 wqe_counter_be
= cqe
->wqe_counter
;
1104 wqe_counter
= be16_to_cpu(wqe_counter_be
);
1105 wqe
= mlx5_wq_ll_get_wqe(&rq
->wq
, wqe_counter
);
1106 cqe_bcnt
= be32_to_cpu(cqe
->byte_cnt
);
1108 skb
= skb_from_cqe(rq
, cqe
, wqe_counter
, cqe_bcnt
);
1112 mlx5i_complete_rx_cqe(rq
, cqe
, cqe_bcnt
, skb
);
1113 napi_gro_receive(rq
->cq
.napi
, skb
);
1116 mlx5_wq_ll_pop(&rq
->wq
, wqe_counter_be
,
1117 &wqe
->next
.next_wqe_index
);
1120 #endif /* CONFIG_MLX5_CORE_IPOIB */