]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - drivers/net/ethernet/google/gve/gve_rx.c
gve: Fix GFP flags when allocing pages
[mirror_ubuntu-jammy-kernel.git] / drivers / net / ethernet / google / gve / gve_rx.c
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include "gve.h"
8 #include "gve_adminq.h"
9 #include "gve_utils.h"
10 #include <linux/etherdevice.h>
11
12 static void gve_rx_free_buffer(struct device *dev,
13 struct gve_rx_slot_page_info *page_info,
14 union gve_rx_data_slot *data_slot)
15 {
16 dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
17 GVE_DATA_SLOT_ADDR_PAGE_MASK);
18
19 gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
20 }
21
22 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
23 {
24 if (rx->data.raw_addressing) {
25 u32 slots = rx->mask + 1;
26 int i;
27
28 for (i = 0; i < slots; i++)
29 gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
30 &rx->data.data_ring[i]);
31 } else {
32 gve_unassign_qpl(priv, rx->data.qpl->id);
33 rx->data.qpl = NULL;
34 }
35 kvfree(rx->data.page_info);
36 rx->data.page_info = NULL;
37 }
38
39 static void gve_rx_free_ring(struct gve_priv *priv, int idx)
40 {
41 struct gve_rx_ring *rx = &priv->rx[idx];
42 struct device *dev = &priv->pdev->dev;
43 u32 slots = rx->mask + 1;
44 size_t bytes;
45
46 gve_rx_remove_from_block(priv, idx);
47
48 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
49 dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
50 rx->desc.desc_ring = NULL;
51
52 dma_free_coherent(dev, sizeof(*rx->q_resources),
53 rx->q_resources, rx->q_resources_bus);
54 rx->q_resources = NULL;
55
56 gve_rx_unfill_pages(priv, rx);
57
58 bytes = sizeof(*rx->data.data_ring) * slots;
59 dma_free_coherent(dev, bytes, rx->data.data_ring,
60 rx->data.data_bus);
61 rx->data.data_ring = NULL;
62 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
63 }
64
65 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
66 dma_addr_t addr, struct page *page, __be64 *slot_addr)
67 {
68 page_info->page = page;
69 page_info->page_offset = 0;
70 page_info->page_address = page_address(page);
71 *slot_addr = cpu_to_be64(addr);
72 }
73
74 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
75 struct gve_rx_slot_page_info *page_info,
76 union gve_rx_data_slot *data_slot)
77 {
78 struct page *page;
79 dma_addr_t dma;
80 int err;
81
82 err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
83 GFP_ATOMIC);
84 if (err)
85 return err;
86
87 gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
88 return 0;
89 }
90
91 static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
92 {
93 struct gve_priv *priv = rx->gve;
94 u32 slots;
95 int err;
96 int i;
97
98 /* Allocate one page per Rx queue slot. Each page is split into two
99 * packet buffers, when possible we "page flip" between the two.
100 */
101 slots = rx->mask + 1;
102
103 rx->data.page_info = kvzalloc(slots *
104 sizeof(*rx->data.page_info), GFP_KERNEL);
105 if (!rx->data.page_info)
106 return -ENOMEM;
107
108 if (!rx->data.raw_addressing) {
109 rx->data.qpl = gve_assign_rx_qpl(priv);
110 if (!rx->data.qpl) {
111 kvfree(rx->data.page_info);
112 rx->data.page_info = NULL;
113 return -ENOMEM;
114 }
115 }
116 for (i = 0; i < slots; i++) {
117 if (!rx->data.raw_addressing) {
118 struct page *page = rx->data.qpl->pages[i];
119 dma_addr_t addr = i * PAGE_SIZE;
120
121 gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
122 &rx->data.data_ring[i].qpl_offset);
123 continue;
124 }
125 err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
126 &rx->data.data_ring[i]);
127 if (err)
128 goto alloc_err;
129 }
130
131 return slots;
132 alloc_err:
133 while (i--)
134 gve_rx_free_buffer(&priv->pdev->dev,
135 &rx->data.page_info[i],
136 &rx->data.data_ring[i]);
137 return err;
138 }
139
140 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
141 {
142 struct gve_rx_ring *rx = &priv->rx[idx];
143 struct device *hdev = &priv->pdev->dev;
144 u32 slots, npages;
145 int filled_pages;
146 size_t bytes;
147 int err;
148
149 netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
150 /* Make sure everything is zeroed to start with */
151 memset(rx, 0, sizeof(*rx));
152
153 rx->gve = priv;
154 rx->q_num = idx;
155
156 slots = priv->rx_data_slot_cnt;
157 rx->mask = slots - 1;
158 rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
159
160 /* alloc rx data ring */
161 bytes = sizeof(*rx->data.data_ring) * slots;
162 rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
163 &rx->data.data_bus,
164 GFP_KERNEL);
165 if (!rx->data.data_ring)
166 return -ENOMEM;
167 filled_pages = gve_prefill_rx_pages(rx);
168 if (filled_pages < 0) {
169 err = -ENOMEM;
170 goto abort_with_slots;
171 }
172 rx->fill_cnt = filled_pages;
173 /* Ensure data ring slots (packet buffers) are visible. */
174 dma_wmb();
175
176 /* Alloc gve_queue_resources */
177 rx->q_resources =
178 dma_alloc_coherent(hdev,
179 sizeof(*rx->q_resources),
180 &rx->q_resources_bus,
181 GFP_KERNEL);
182 if (!rx->q_resources) {
183 err = -ENOMEM;
184 goto abort_filled;
185 }
186 netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
187 (unsigned long)rx->data.data_bus);
188
189 /* alloc rx desc ring */
190 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
191 npages = bytes / PAGE_SIZE;
192 if (npages * PAGE_SIZE != bytes) {
193 err = -EIO;
194 goto abort_with_q_resources;
195 }
196
197 rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
198 GFP_KERNEL);
199 if (!rx->desc.desc_ring) {
200 err = -ENOMEM;
201 goto abort_with_q_resources;
202 }
203 rx->cnt = 0;
204 rx->db_threshold = priv->rx_desc_cnt / 2;
205 rx->desc.seqno = 1;
206 gve_rx_add_to_block(priv, idx);
207
208 return 0;
209
210 abort_with_q_resources:
211 dma_free_coherent(hdev, sizeof(*rx->q_resources),
212 rx->q_resources, rx->q_resources_bus);
213 rx->q_resources = NULL;
214 abort_filled:
215 gve_rx_unfill_pages(priv, rx);
216 abort_with_slots:
217 bytes = sizeof(*rx->data.data_ring) * slots;
218 dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
219 rx->data.data_ring = NULL;
220
221 return err;
222 }
223
224 int gve_rx_alloc_rings(struct gve_priv *priv)
225 {
226 int err = 0;
227 int i;
228
229 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
230 err = gve_rx_alloc_ring(priv, i);
231 if (err) {
232 netif_err(priv, drv, priv->dev,
233 "Failed to alloc rx ring=%d: err=%d\n",
234 i, err);
235 break;
236 }
237 }
238 /* Unallocate if there was an error */
239 if (err) {
240 int j;
241
242 for (j = 0; j < i; j++)
243 gve_rx_free_ring(priv, j);
244 }
245 return err;
246 }
247
248 void gve_rx_free_rings_gqi(struct gve_priv *priv)
249 {
250 int i;
251
252 for (i = 0; i < priv->rx_cfg.num_queues; i++)
253 gve_rx_free_ring(priv, i);
254 }
255
256 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
257 {
258 u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
259
260 iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
261 }
262
263 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
264 {
265 if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
266 return PKT_HASH_TYPE_L4;
267 if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
268 return PKT_HASH_TYPE_L3;
269 return PKT_HASH_TYPE_L2;
270 }
271
272 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
273 struct gve_rx_slot_page_info *page_info,
274 u16 len)
275 {
276 struct sk_buff *skb = napi_get_frags(napi);
277
278 if (unlikely(!skb))
279 return NULL;
280
281 skb_add_rx_frag(skb, 0, page_info->page,
282 page_info->page_offset +
283 GVE_RX_PAD, len, PAGE_SIZE / 2);
284
285 return skb;
286 }
287
288 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
289 {
290 const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
291
292 /* "flip" to other packet buffer on this page */
293 page_info->page_offset ^= PAGE_SIZE / 2;
294 *(slot_addr) ^= offset;
295 }
296
297 static bool gve_rx_can_flip_buffers(struct net_device *netdev)
298 {
299 return PAGE_SIZE == 4096
300 ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false;
301 }
302
303 static int gve_rx_can_recycle_buffer(struct page *page)
304 {
305 int pagecount = page_count(page);
306
307 /* This page is not being used by any SKBs - reuse */
308 if (pagecount == 1)
309 return 1;
310 /* This page is still being used by an SKB - we can't reuse */
311 else if (pagecount >= 2)
312 return 0;
313 WARN(pagecount < 1, "Pagecount should never be < 1");
314 return -1;
315 }
316
317 static struct sk_buff *
318 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
319 struct gve_rx_slot_page_info *page_info, u16 len,
320 struct napi_struct *napi,
321 union gve_rx_data_slot *data_slot)
322 {
323 struct sk_buff *skb;
324
325 skb = gve_rx_add_frags(napi, page_info, len);
326 if (!skb)
327 return NULL;
328
329 /* Optimistically stop the kernel from freeing the page by increasing
330 * the page bias. We will check the refcount in refill to determine if
331 * we need to alloc a new page.
332 */
333 get_page(page_info->page);
334
335 return skb;
336 }
337
338 static struct sk_buff *
339 gve_rx_qpl(struct device *dev, struct net_device *netdev,
340 struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
341 u16 len, struct napi_struct *napi,
342 union gve_rx_data_slot *data_slot)
343 {
344 struct sk_buff *skb;
345
346 /* if raw_addressing mode is not enabled gvnic can only receive into
347 * registered segments. If the buffer can't be recycled, our only
348 * choice is to copy the data out of it so that we can return it to the
349 * device.
350 */
351 if (page_info->can_flip) {
352 skb = gve_rx_add_frags(napi, page_info, len);
353 /* No point in recycling if we didn't get the skb */
354 if (skb) {
355 /* Make sure that the page isn't freed. */
356 get_page(page_info->page);
357 gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
358 }
359 } else {
360 skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD);
361 if (skb) {
362 u64_stats_update_begin(&rx->statss);
363 rx->rx_copied_pkt++;
364 u64_stats_update_end(&rx->statss);
365 }
366 }
367 return skb;
368 }
369
370 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
371 netdev_features_t feat, u32 idx)
372 {
373 struct gve_rx_slot_page_info *page_info;
374 struct gve_priv *priv = rx->gve;
375 struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
376 struct net_device *dev = priv->dev;
377 union gve_rx_data_slot *data_slot;
378 struct sk_buff *skb = NULL;
379 dma_addr_t page_bus;
380 u16 len;
381
382 /* drop this packet */
383 if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
384 u64_stats_update_begin(&rx->statss);
385 rx->rx_desc_err_dropped_pkt++;
386 u64_stats_update_end(&rx->statss);
387 return false;
388 }
389
390 len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
391 page_info = &rx->data.page_info[idx];
392
393 data_slot = &rx->data.data_ring[idx];
394 page_bus = (rx->data.raw_addressing) ?
395 be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK :
396 rx->data.qpl->page_buses[idx];
397 dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
398 PAGE_SIZE, DMA_FROM_DEVICE);
399
400 if (len <= priv->rx_copybreak) {
401 /* Just copy small packets */
402 skb = gve_rx_copy(dev, napi, page_info, len, GVE_RX_PAD);
403 u64_stats_update_begin(&rx->statss);
404 rx->rx_copied_pkt++;
405 rx->rx_copybreak_pkt++;
406 u64_stats_update_end(&rx->statss);
407 } else {
408 u8 can_flip = gve_rx_can_flip_buffers(dev);
409 int recycle = 0;
410
411 if (can_flip) {
412 recycle = gve_rx_can_recycle_buffer(page_info->page);
413 if (recycle < 0) {
414 if (!rx->data.raw_addressing)
415 gve_schedule_reset(priv);
416 return false;
417 }
418 }
419
420 page_info->can_flip = can_flip && recycle;
421 if (rx->data.raw_addressing) {
422 skb = gve_rx_raw_addressing(&priv->pdev->dev, dev,
423 page_info, len, napi,
424 data_slot);
425 } else {
426 skb = gve_rx_qpl(&priv->pdev->dev, dev, rx,
427 page_info, len, napi, data_slot);
428 }
429 }
430
431 if (!skb) {
432 u64_stats_update_begin(&rx->statss);
433 rx->rx_skb_alloc_fail++;
434 u64_stats_update_end(&rx->statss);
435 return false;
436 }
437
438 if (likely(feat & NETIF_F_RXCSUM)) {
439 /* NIC passes up the partial sum */
440 if (rx_desc->csum)
441 skb->ip_summed = CHECKSUM_COMPLETE;
442 else
443 skb->ip_summed = CHECKSUM_NONE;
444 skb->csum = csum_unfold(rx_desc->csum);
445 }
446
447 /* parse flags & pass relevant info up */
448 if (likely(feat & NETIF_F_RXHASH) &&
449 gve_needs_rss(rx_desc->flags_seq))
450 skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
451 gve_rss_type(rx_desc->flags_seq));
452
453 if (skb_is_nonlinear(skb))
454 napi_gro_frags(napi);
455 else
456 napi_gro_receive(napi, skb);
457 return true;
458 }
459
460 static bool gve_rx_work_pending(struct gve_rx_ring *rx)
461 {
462 struct gve_rx_desc *desc;
463 __be16 flags_seq;
464 u32 next_idx;
465
466 next_idx = rx->cnt & rx->mask;
467 desc = rx->desc.desc_ring + next_idx;
468
469 flags_seq = desc->flags_seq;
470 /* Make sure we have synchronized the seq no with the device */
471 smp_rmb();
472
473 return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
474 }
475
476 static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
477 {
478 int refill_target = rx->mask + 1;
479 u32 fill_cnt = rx->fill_cnt;
480
481 while (fill_cnt - rx->cnt < refill_target) {
482 struct gve_rx_slot_page_info *page_info;
483 u32 idx = fill_cnt & rx->mask;
484
485 page_info = &rx->data.page_info[idx];
486 if (page_info->can_flip) {
487 /* The other half of the page is free because it was
488 * free when we processed the descriptor. Flip to it.
489 */
490 union gve_rx_data_slot *data_slot =
491 &rx->data.data_ring[idx];
492
493 gve_rx_flip_buff(page_info, &data_slot->addr);
494 page_info->can_flip = 0;
495 } else {
496 /* It is possible that the networking stack has already
497 * finished processing all outstanding packets in the buffer
498 * and it can be reused.
499 * Flipping is unnecessary here - if the networking stack still
500 * owns half the page it is impossible to tell which half. Either
501 * the whole page is free or it needs to be replaced.
502 */
503 int recycle = gve_rx_can_recycle_buffer(page_info->page);
504
505 if (recycle < 0) {
506 if (!rx->data.raw_addressing)
507 gve_schedule_reset(priv);
508 return false;
509 }
510 if (!recycle) {
511 /* We can't reuse the buffer - alloc a new one*/
512 union gve_rx_data_slot *data_slot =
513 &rx->data.data_ring[idx];
514 struct device *dev = &priv->pdev->dev;
515
516 gve_rx_free_buffer(dev, page_info, data_slot);
517 page_info->page = NULL;
518 if (gve_rx_alloc_buffer(priv, dev, page_info,
519 data_slot)) {
520 u64_stats_update_begin(&rx->statss);
521 rx->rx_buf_alloc_fail++;
522 u64_stats_update_end(&rx->statss);
523 break;
524 }
525 }
526 }
527 fill_cnt++;
528 }
529 rx->fill_cnt = fill_cnt;
530 return true;
531 }
532
533 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
534 netdev_features_t feat)
535 {
536 struct gve_priv *priv = rx->gve;
537 u32 work_done = 0, packets = 0;
538 struct gve_rx_desc *desc;
539 u32 cnt = rx->cnt;
540 u32 idx = cnt & rx->mask;
541 u64 bytes = 0;
542
543 desc = rx->desc.desc_ring + idx;
544 while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
545 work_done < budget) {
546 bool dropped;
547
548 netif_info(priv, rx_status, priv->dev,
549 "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
550 rx->q_num, idx, desc, desc->flags_seq);
551 netif_info(priv, rx_status, priv->dev,
552 "[%d] seqno=%d rx->desc.seqno=%d\n",
553 rx->q_num, GVE_SEQNO(desc->flags_seq),
554 rx->desc.seqno);
555 dropped = !gve_rx(rx, desc, feat, idx);
556 if (!dropped) {
557 bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
558 packets++;
559 }
560 cnt++;
561 idx = cnt & rx->mask;
562 desc = rx->desc.desc_ring + idx;
563 rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
564 work_done++;
565 }
566
567 if (!work_done && rx->fill_cnt - cnt > rx->db_threshold)
568 return false;
569
570 u64_stats_update_begin(&rx->statss);
571 rx->rpackets += packets;
572 rx->rbytes += bytes;
573 u64_stats_update_end(&rx->statss);
574 rx->cnt = cnt;
575
576 /* restock ring slots */
577 if (!rx->data.raw_addressing) {
578 /* In QPL mode buffs are refilled as the desc are processed */
579 rx->fill_cnt += work_done;
580 } else if (rx->fill_cnt - cnt <= rx->db_threshold) {
581 /* In raw addressing mode buffs are only refilled if the avail
582 * falls below a threshold.
583 */
584 if (!gve_rx_refill_buffers(priv, rx))
585 return false;
586
587 /* If we were not able to completely refill buffers, we'll want
588 * to schedule this queue for work again to refill buffers.
589 */
590 if (rx->fill_cnt - cnt <= rx->db_threshold) {
591 gve_rx_write_doorbell(priv, rx);
592 return true;
593 }
594 }
595
596 gve_rx_write_doorbell(priv, rx);
597 return gve_rx_work_pending(rx);
598 }
599
600 bool gve_rx_poll(struct gve_notify_block *block, int budget)
601 {
602 struct gve_rx_ring *rx = block->rx;
603 netdev_features_t feat;
604 bool repoll = false;
605
606 feat = block->napi.dev->features;
607
608 /* If budget is 0, do all the work */
609 if (budget == 0)
610 budget = INT_MAX;
611
612 if (budget > 0)
613 repoll |= gve_clean_rx_done(rx, budget, feat);
614 else
615 repoll |= gve_rx_work_pending(rx);
616 return repoll;
617 }