1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
4 * Copyright (C) 2015-2021 Google, Inc.
8 #include "gve_adminq.h"
10 #include <linux/etherdevice.h>
12 static void gve_rx_free_buffer(struct device
*dev
,
13 struct gve_rx_slot_page_info
*page_info
,
14 union gve_rx_data_slot
*data_slot
)
16 dma_addr_t dma
= (dma_addr_t
)(be64_to_cpu(data_slot
->addr
) &
17 GVE_DATA_SLOT_ADDR_PAGE_MASK
);
19 gve_free_page(dev
, page_info
->page
, dma
, DMA_FROM_DEVICE
);
22 static void gve_rx_unfill_pages(struct gve_priv
*priv
, struct gve_rx_ring
*rx
)
24 if (rx
->data
.raw_addressing
) {
25 u32 slots
= rx
->mask
+ 1;
28 for (i
= 0; i
< slots
; i
++)
29 gve_rx_free_buffer(&priv
->pdev
->dev
, &rx
->data
.page_info
[i
],
30 &rx
->data
.data_ring
[i
]);
32 gve_unassign_qpl(priv
, rx
->data
.qpl
->id
);
35 kvfree(rx
->data
.page_info
);
36 rx
->data
.page_info
= NULL
;
39 static void gve_rx_free_ring(struct gve_priv
*priv
, int idx
)
41 struct gve_rx_ring
*rx
= &priv
->rx
[idx
];
42 struct device
*dev
= &priv
->pdev
->dev
;
43 u32 slots
= rx
->mask
+ 1;
46 gve_rx_remove_from_block(priv
, idx
);
48 bytes
= sizeof(struct gve_rx_desc
) * priv
->rx_desc_cnt
;
49 dma_free_coherent(dev
, bytes
, rx
->desc
.desc_ring
, rx
->desc
.bus
);
50 rx
->desc
.desc_ring
= NULL
;
52 dma_free_coherent(dev
, sizeof(*rx
->q_resources
),
53 rx
->q_resources
, rx
->q_resources_bus
);
54 rx
->q_resources
= NULL
;
56 gve_rx_unfill_pages(priv
, rx
);
58 bytes
= sizeof(*rx
->data
.data_ring
) * slots
;
59 dma_free_coherent(dev
, bytes
, rx
->data
.data_ring
,
61 rx
->data
.data_ring
= NULL
;
62 netif_dbg(priv
, drv
, priv
->dev
, "freed rx ring %d\n", idx
);
65 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info
*page_info
,
66 dma_addr_t addr
, struct page
*page
, __be64
*slot_addr
)
68 page_info
->page
= page
;
69 page_info
->page_offset
= 0;
70 page_info
->page_address
= page_address(page
);
71 *slot_addr
= cpu_to_be64(addr
);
74 static int gve_rx_alloc_buffer(struct gve_priv
*priv
, struct device
*dev
,
75 struct gve_rx_slot_page_info
*page_info
,
76 union gve_rx_data_slot
*data_slot
)
82 err
= gve_alloc_page(priv
, dev
, &page
, &dma
, DMA_FROM_DEVICE
,
87 gve_setup_rx_buffer(page_info
, dma
, page
, &data_slot
->addr
);
91 static int gve_prefill_rx_pages(struct gve_rx_ring
*rx
)
93 struct gve_priv
*priv
= rx
->gve
;
98 /* Allocate one page per Rx queue slot. Each page is split into two
99 * packet buffers, when possible we "page flip" between the two.
101 slots
= rx
->mask
+ 1;
103 rx
->data
.page_info
= kvzalloc(slots
*
104 sizeof(*rx
->data
.page_info
), GFP_KERNEL
);
105 if (!rx
->data
.page_info
)
108 if (!rx
->data
.raw_addressing
) {
109 rx
->data
.qpl
= gve_assign_rx_qpl(priv
);
111 kvfree(rx
->data
.page_info
);
112 rx
->data
.page_info
= NULL
;
116 for (i
= 0; i
< slots
; i
++) {
117 if (!rx
->data
.raw_addressing
) {
118 struct page
*page
= rx
->data
.qpl
->pages
[i
];
119 dma_addr_t addr
= i
* PAGE_SIZE
;
121 gve_setup_rx_buffer(&rx
->data
.page_info
[i
], addr
, page
,
122 &rx
->data
.data_ring
[i
].qpl_offset
);
125 err
= gve_rx_alloc_buffer(priv
, &priv
->pdev
->dev
, &rx
->data
.page_info
[i
],
126 &rx
->data
.data_ring
[i
]);
134 gve_rx_free_buffer(&priv
->pdev
->dev
,
135 &rx
->data
.page_info
[i
],
136 &rx
->data
.data_ring
[i
]);
140 static int gve_rx_alloc_ring(struct gve_priv
*priv
, int idx
)
142 struct gve_rx_ring
*rx
= &priv
->rx
[idx
];
143 struct device
*hdev
= &priv
->pdev
->dev
;
149 netif_dbg(priv
, drv
, priv
->dev
, "allocating rx ring\n");
150 /* Make sure everything is zeroed to start with */
151 memset(rx
, 0, sizeof(*rx
));
156 slots
= priv
->rx_data_slot_cnt
;
157 rx
->mask
= slots
- 1;
158 rx
->data
.raw_addressing
= priv
->queue_format
== GVE_GQI_RDA_FORMAT
;
160 /* alloc rx data ring */
161 bytes
= sizeof(*rx
->data
.data_ring
) * slots
;
162 rx
->data
.data_ring
= dma_alloc_coherent(hdev
, bytes
,
165 if (!rx
->data
.data_ring
)
167 filled_pages
= gve_prefill_rx_pages(rx
);
168 if (filled_pages
< 0) {
170 goto abort_with_slots
;
172 rx
->fill_cnt
= filled_pages
;
173 /* Ensure data ring slots (packet buffers) are visible. */
176 /* Alloc gve_queue_resources */
178 dma_alloc_coherent(hdev
,
179 sizeof(*rx
->q_resources
),
180 &rx
->q_resources_bus
,
182 if (!rx
->q_resources
) {
186 netif_dbg(priv
, drv
, priv
->dev
, "rx[%d]->data.data_bus=%lx\n", idx
,
187 (unsigned long)rx
->data
.data_bus
);
189 /* alloc rx desc ring */
190 bytes
= sizeof(struct gve_rx_desc
) * priv
->rx_desc_cnt
;
191 npages
= bytes
/ PAGE_SIZE
;
192 if (npages
* PAGE_SIZE
!= bytes
) {
194 goto abort_with_q_resources
;
197 rx
->desc
.desc_ring
= dma_alloc_coherent(hdev
, bytes
, &rx
->desc
.bus
,
199 if (!rx
->desc
.desc_ring
) {
201 goto abort_with_q_resources
;
204 rx
->db_threshold
= priv
->rx_desc_cnt
/ 2;
206 gve_rx_add_to_block(priv
, idx
);
210 abort_with_q_resources
:
211 dma_free_coherent(hdev
, sizeof(*rx
->q_resources
),
212 rx
->q_resources
, rx
->q_resources_bus
);
213 rx
->q_resources
= NULL
;
215 gve_rx_unfill_pages(priv
, rx
);
217 bytes
= sizeof(*rx
->data
.data_ring
) * slots
;
218 dma_free_coherent(hdev
, bytes
, rx
->data
.data_ring
, rx
->data
.data_bus
);
219 rx
->data
.data_ring
= NULL
;
224 int gve_rx_alloc_rings(struct gve_priv
*priv
)
229 for (i
= 0; i
< priv
->rx_cfg
.num_queues
; i
++) {
230 err
= gve_rx_alloc_ring(priv
, i
);
232 netif_err(priv
, drv
, priv
->dev
,
233 "Failed to alloc rx ring=%d: err=%d\n",
238 /* Unallocate if there was an error */
242 for (j
= 0; j
< i
; j
++)
243 gve_rx_free_ring(priv
, j
);
248 void gve_rx_free_rings_gqi(struct gve_priv
*priv
)
252 for (i
= 0; i
< priv
->rx_cfg
.num_queues
; i
++)
253 gve_rx_free_ring(priv
, i
);
256 void gve_rx_write_doorbell(struct gve_priv
*priv
, struct gve_rx_ring
*rx
)
258 u32 db_idx
= be32_to_cpu(rx
->q_resources
->db_index
);
260 iowrite32be(rx
->fill_cnt
, &priv
->db_bar2
[db_idx
]);
263 static enum pkt_hash_types
gve_rss_type(__be16 pkt_flags
)
265 if (likely(pkt_flags
& (GVE_RXF_TCP
| GVE_RXF_UDP
)))
266 return PKT_HASH_TYPE_L4
;
267 if (pkt_flags
& (GVE_RXF_IPV4
| GVE_RXF_IPV6
))
268 return PKT_HASH_TYPE_L3
;
269 return PKT_HASH_TYPE_L2
;
272 static struct sk_buff
*gve_rx_add_frags(struct napi_struct
*napi
,
273 struct gve_rx_slot_page_info
*page_info
,
276 struct sk_buff
*skb
= napi_get_frags(napi
);
281 skb_add_rx_frag(skb
, 0, page_info
->page
,
282 page_info
->page_offset
+
283 GVE_RX_PAD
, len
, PAGE_SIZE
/ 2);
288 static void gve_rx_flip_buff(struct gve_rx_slot_page_info
*page_info
, __be64
*slot_addr
)
290 const __be64 offset
= cpu_to_be64(PAGE_SIZE
/ 2);
292 /* "flip" to other packet buffer on this page */
293 page_info
->page_offset
^= PAGE_SIZE
/ 2;
294 *(slot_addr
) ^= offset
;
297 static bool gve_rx_can_flip_buffers(struct net_device
*netdev
)
299 return PAGE_SIZE
== 4096
300 ? netdev
->mtu
+ GVE_RX_PAD
+ ETH_HLEN
<= PAGE_SIZE
/ 2 : false;
303 static int gve_rx_can_recycle_buffer(struct page
*page
)
305 int pagecount
= page_count(page
);
307 /* This page is not being used by any SKBs - reuse */
310 /* This page is still being used by an SKB - we can't reuse */
311 else if (pagecount
>= 2)
313 WARN(pagecount
< 1, "Pagecount should never be < 1");
317 static struct sk_buff
*
318 gve_rx_raw_addressing(struct device
*dev
, struct net_device
*netdev
,
319 struct gve_rx_slot_page_info
*page_info
, u16 len
,
320 struct napi_struct
*napi
,
321 union gve_rx_data_slot
*data_slot
)
325 skb
= gve_rx_add_frags(napi
, page_info
, len
);
329 /* Optimistically stop the kernel from freeing the page by increasing
330 * the page bias. We will check the refcount in refill to determine if
331 * we need to alloc a new page.
333 get_page(page_info
->page
);
338 static struct sk_buff
*
339 gve_rx_qpl(struct device
*dev
, struct net_device
*netdev
,
340 struct gve_rx_ring
*rx
, struct gve_rx_slot_page_info
*page_info
,
341 u16 len
, struct napi_struct
*napi
,
342 union gve_rx_data_slot
*data_slot
)
346 /* if raw_addressing mode is not enabled gvnic can only receive into
347 * registered segments. If the buffer can't be recycled, our only
348 * choice is to copy the data out of it so that we can return it to the
351 if (page_info
->can_flip
) {
352 skb
= gve_rx_add_frags(napi
, page_info
, len
);
353 /* No point in recycling if we didn't get the skb */
355 /* Make sure that the page isn't freed. */
356 get_page(page_info
->page
);
357 gve_rx_flip_buff(page_info
, &data_slot
->qpl_offset
);
360 skb
= gve_rx_copy(netdev
, napi
, page_info
, len
, GVE_RX_PAD
);
362 u64_stats_update_begin(&rx
->statss
);
364 u64_stats_update_end(&rx
->statss
);
370 static bool gve_rx(struct gve_rx_ring
*rx
, struct gve_rx_desc
*rx_desc
,
371 netdev_features_t feat
, u32 idx
)
373 struct gve_rx_slot_page_info
*page_info
;
374 struct gve_priv
*priv
= rx
->gve
;
375 struct napi_struct
*napi
= &priv
->ntfy_blocks
[rx
->ntfy_id
].napi
;
376 struct net_device
*dev
= priv
->dev
;
377 union gve_rx_data_slot
*data_slot
;
378 struct sk_buff
*skb
= NULL
;
382 /* drop this packet */
383 if (unlikely(rx_desc
->flags_seq
& GVE_RXF_ERR
)) {
384 u64_stats_update_begin(&rx
->statss
);
385 rx
->rx_desc_err_dropped_pkt
++;
386 u64_stats_update_end(&rx
->statss
);
390 len
= be16_to_cpu(rx_desc
->len
) - GVE_RX_PAD
;
391 page_info
= &rx
->data
.page_info
[idx
];
393 data_slot
= &rx
->data
.data_ring
[idx
];
394 page_bus
= (rx
->data
.raw_addressing
) ?
395 be64_to_cpu(data_slot
->addr
) & GVE_DATA_SLOT_ADDR_PAGE_MASK
:
396 rx
->data
.qpl
->page_buses
[idx
];
397 dma_sync_single_for_cpu(&priv
->pdev
->dev
, page_bus
,
398 PAGE_SIZE
, DMA_FROM_DEVICE
);
400 if (len
<= priv
->rx_copybreak
) {
401 /* Just copy small packets */
402 skb
= gve_rx_copy(dev
, napi
, page_info
, len
, GVE_RX_PAD
);
403 u64_stats_update_begin(&rx
->statss
);
405 rx
->rx_copybreak_pkt
++;
406 u64_stats_update_end(&rx
->statss
);
408 u8 can_flip
= gve_rx_can_flip_buffers(dev
);
412 recycle
= gve_rx_can_recycle_buffer(page_info
->page
);
414 if (!rx
->data
.raw_addressing
)
415 gve_schedule_reset(priv
);
420 page_info
->can_flip
= can_flip
&& recycle
;
421 if (rx
->data
.raw_addressing
) {
422 skb
= gve_rx_raw_addressing(&priv
->pdev
->dev
, dev
,
423 page_info
, len
, napi
,
426 skb
= gve_rx_qpl(&priv
->pdev
->dev
, dev
, rx
,
427 page_info
, len
, napi
, data_slot
);
432 u64_stats_update_begin(&rx
->statss
);
433 rx
->rx_skb_alloc_fail
++;
434 u64_stats_update_end(&rx
->statss
);
438 if (likely(feat
& NETIF_F_RXCSUM
)) {
439 /* NIC passes up the partial sum */
441 skb
->ip_summed
= CHECKSUM_COMPLETE
;
443 skb
->ip_summed
= CHECKSUM_NONE
;
444 skb
->csum
= csum_unfold(rx_desc
->csum
);
447 /* parse flags & pass relevant info up */
448 if (likely(feat
& NETIF_F_RXHASH
) &&
449 gve_needs_rss(rx_desc
->flags_seq
))
450 skb_set_hash(skb
, be32_to_cpu(rx_desc
->rss_hash
),
451 gve_rss_type(rx_desc
->flags_seq
));
453 if (skb_is_nonlinear(skb
))
454 napi_gro_frags(napi
);
456 napi_gro_receive(napi
, skb
);
460 static bool gve_rx_work_pending(struct gve_rx_ring
*rx
)
462 struct gve_rx_desc
*desc
;
466 next_idx
= rx
->cnt
& rx
->mask
;
467 desc
= rx
->desc
.desc_ring
+ next_idx
;
469 flags_seq
= desc
->flags_seq
;
470 /* Make sure we have synchronized the seq no with the device */
473 return (GVE_SEQNO(flags_seq
) == rx
->desc
.seqno
);
476 static bool gve_rx_refill_buffers(struct gve_priv
*priv
, struct gve_rx_ring
*rx
)
478 int refill_target
= rx
->mask
+ 1;
479 u32 fill_cnt
= rx
->fill_cnt
;
481 while (fill_cnt
- rx
->cnt
< refill_target
) {
482 struct gve_rx_slot_page_info
*page_info
;
483 u32 idx
= fill_cnt
& rx
->mask
;
485 page_info
= &rx
->data
.page_info
[idx
];
486 if (page_info
->can_flip
) {
487 /* The other half of the page is free because it was
488 * free when we processed the descriptor. Flip to it.
490 union gve_rx_data_slot
*data_slot
=
491 &rx
->data
.data_ring
[idx
];
493 gve_rx_flip_buff(page_info
, &data_slot
->addr
);
494 page_info
->can_flip
= 0;
496 /* It is possible that the networking stack has already
497 * finished processing all outstanding packets in the buffer
498 * and it can be reused.
499 * Flipping is unnecessary here - if the networking stack still
500 * owns half the page it is impossible to tell which half. Either
501 * the whole page is free or it needs to be replaced.
503 int recycle
= gve_rx_can_recycle_buffer(page_info
->page
);
506 if (!rx
->data
.raw_addressing
)
507 gve_schedule_reset(priv
);
511 /* We can't reuse the buffer - alloc a new one*/
512 union gve_rx_data_slot
*data_slot
=
513 &rx
->data
.data_ring
[idx
];
514 struct device
*dev
= &priv
->pdev
->dev
;
516 gve_rx_free_buffer(dev
, page_info
, data_slot
);
517 page_info
->page
= NULL
;
518 if (gve_rx_alloc_buffer(priv
, dev
, page_info
,
520 u64_stats_update_begin(&rx
->statss
);
521 rx
->rx_buf_alloc_fail
++;
522 u64_stats_update_end(&rx
->statss
);
529 rx
->fill_cnt
= fill_cnt
;
533 bool gve_clean_rx_done(struct gve_rx_ring
*rx
, int budget
,
534 netdev_features_t feat
)
536 struct gve_priv
*priv
= rx
->gve
;
537 u32 work_done
= 0, packets
= 0;
538 struct gve_rx_desc
*desc
;
540 u32 idx
= cnt
& rx
->mask
;
543 desc
= rx
->desc
.desc_ring
+ idx
;
544 while ((GVE_SEQNO(desc
->flags_seq
) == rx
->desc
.seqno
) &&
545 work_done
< budget
) {
548 netif_info(priv
, rx_status
, priv
->dev
,
549 "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
550 rx
->q_num
, idx
, desc
, desc
->flags_seq
);
551 netif_info(priv
, rx_status
, priv
->dev
,
552 "[%d] seqno=%d rx->desc.seqno=%d\n",
553 rx
->q_num
, GVE_SEQNO(desc
->flags_seq
),
555 dropped
= !gve_rx(rx
, desc
, feat
, idx
);
557 bytes
+= be16_to_cpu(desc
->len
) - GVE_RX_PAD
;
561 idx
= cnt
& rx
->mask
;
562 desc
= rx
->desc
.desc_ring
+ idx
;
563 rx
->desc
.seqno
= gve_next_seqno(rx
->desc
.seqno
);
567 if (!work_done
&& rx
->fill_cnt
- cnt
> rx
->db_threshold
)
570 u64_stats_update_begin(&rx
->statss
);
571 rx
->rpackets
+= packets
;
573 u64_stats_update_end(&rx
->statss
);
576 /* restock ring slots */
577 if (!rx
->data
.raw_addressing
) {
578 /* In QPL mode buffs are refilled as the desc are processed */
579 rx
->fill_cnt
+= work_done
;
580 } else if (rx
->fill_cnt
- cnt
<= rx
->db_threshold
) {
581 /* In raw addressing mode buffs are only refilled if the avail
582 * falls below a threshold.
584 if (!gve_rx_refill_buffers(priv
, rx
))
587 /* If we were not able to completely refill buffers, we'll want
588 * to schedule this queue for work again to refill buffers.
590 if (rx
->fill_cnt
- cnt
<= rx
->db_threshold
) {
591 gve_rx_write_doorbell(priv
, rx
);
596 gve_rx_write_doorbell(priv
, rx
);
597 return gve_rx_work_pending(rx
);
600 bool gve_rx_poll(struct gve_notify_block
*block
, int budget
)
602 struct gve_rx_ring
*rx
= block
->rx
;
603 netdev_features_t feat
;
606 feat
= block
->napi
.dev
->features
;
608 /* If budget is 0, do all the work */
613 repoll
|= gve_clean_rx_done(rx
, budget
, feat
);
615 repoll
|= gve_rx_work_pending(rx
);