]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/net/virtio_net.c
virtio-net: fail XDP set if guest csum is negotiated
[mirror_ubuntu-bionic-kernel.git] / drivers / net / virtio_net.c
1 /* A network driver using virtio.
2 *
3 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
17 */
18 //#define DEBUG
19 #include <linux/netdevice.h>
20 #include <linux/etherdevice.h>
21 #include <linux/ethtool.h>
22 #include <linux/module.h>
23 #include <linux/virtio.h>
24 #include <linux/virtio_net.h>
25 #include <linux/bpf.h>
26 #include <linux/bpf_trace.h>
27 #include <linux/scatterlist.h>
28 #include <linux/if_vlan.h>
29 #include <linux/slab.h>
30 #include <linux/cpu.h>
31 #include <linux/average.h>
32 #include <linux/filter.h>
33 #include <net/route.h>
34
35 static int napi_weight = NAPI_POLL_WEIGHT;
36 module_param(napi_weight, int, 0444);
37
38 static bool csum = true, gso = true, napi_tx;
39 module_param(csum, bool, 0444);
40 module_param(gso, bool, 0444);
41 module_param(napi_tx, bool, 0644);
42
43 /* FIXME: MTU in config. */
44 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
45 #define GOOD_COPY_LEN 128
46
47 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
48
49 /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
50 #define VIRTIO_XDP_HEADROOM 256
51
52 /* RX packet size EWMA. The average packet size is used to determine the packet
53 * buffer size when refilling RX rings. As the entire RX ring may be refilled
54 * at once, the weight is chosen so that the EWMA will be insensitive to short-
55 * term, transient changes in packet size.
56 */
57 DECLARE_EWMA(pkt_len, 0, 64)
58
59 #define VIRTNET_DRIVER_VERSION "1.0.0"
60
61 static const unsigned long guest_offloads[] = {
62 VIRTIO_NET_F_GUEST_TSO4,
63 VIRTIO_NET_F_GUEST_TSO6,
64 VIRTIO_NET_F_GUEST_ECN,
65 VIRTIO_NET_F_GUEST_UFO,
66 VIRTIO_NET_F_GUEST_CSUM
67 };
68
69 struct virtnet_stats {
70 struct u64_stats_sync tx_syncp;
71 struct u64_stats_sync rx_syncp;
72 u64 tx_bytes;
73 u64 tx_packets;
74
75 u64 rx_bytes;
76 u64 rx_packets;
77 };
78
79 /* Internal representation of a send virtqueue */
80 struct send_queue {
81 /* Virtqueue associated with this send _queue */
82 struct virtqueue *vq;
83
84 /* TX: fragments + linear part + virtio header */
85 struct scatterlist sg[MAX_SKB_FRAGS + 2];
86
87 /* Name of the send queue: output.$index */
88 char name[40];
89
90 struct napi_struct napi;
91 };
92
93 /* Internal representation of a receive virtqueue */
94 struct receive_queue {
95 /* Virtqueue associated with this receive_queue */
96 struct virtqueue *vq;
97
98 struct napi_struct napi;
99
100 struct bpf_prog __rcu *xdp_prog;
101
102 /* Chain pages by the private ptr. */
103 struct page *pages;
104
105 /* Average packet length for mergeable receive buffers. */
106 struct ewma_pkt_len mrg_avg_pkt_len;
107
108 /* Page frag for packet buffer allocation. */
109 struct page_frag alloc_frag;
110
111 /* RX: fragments + linear part + virtio header */
112 struct scatterlist sg[MAX_SKB_FRAGS + 2];
113
114 /* Min single buffer size for mergeable buffers case. */
115 unsigned int min_buf_len;
116
117 /* Name of this receive queue: input.$index */
118 char name[40];
119 };
120
121 /* Control VQ buffers: protected by the rtnl lock */
122 struct control_buf {
123 struct virtio_net_ctrl_hdr hdr;
124 virtio_net_ctrl_ack status;
125 struct virtio_net_ctrl_mq mq;
126 u8 promisc;
127 u8 allmulti;
128 __virtio16 vid;
129 u64 offloads;
130 };
131
132 struct virtnet_info {
133 struct virtio_device *vdev;
134 struct virtqueue *cvq;
135 struct net_device *dev;
136 struct send_queue *sq;
137 struct receive_queue *rq;
138 unsigned int status;
139
140 /* Max # of queue pairs supported by the device */
141 u16 max_queue_pairs;
142
143 /* # of queue pairs currently used by the driver */
144 u16 curr_queue_pairs;
145
146 /* # of XDP queue pairs currently used by the driver */
147 u16 xdp_queue_pairs;
148
149 /* I like... big packets and I cannot lie! */
150 bool big_packets;
151
152 /* Host will merge rx buffers for big packets (shake it! shake it!) */
153 bool mergeable_rx_bufs;
154
155 /* Has control virtqueue */
156 bool has_cvq;
157
158 /* Host can handle any s/g split between our header and packet data */
159 bool any_header_sg;
160
161 /* Packet virtio header size */
162 u8 hdr_len;
163
164 /* Active statistics */
165 struct virtnet_stats __percpu *stats;
166
167 /* Work struct for refilling if we run low on memory. */
168 struct delayed_work refill;
169
170 /* Work struct for config space updates */
171 struct work_struct config_work;
172
173 /* Does the affinity hint is set for virtqueues? */
174 bool affinity_hint_set;
175
176 /* CPU hotplug instances for online & dead */
177 struct hlist_node node;
178 struct hlist_node node_dead;
179
180 struct control_buf *ctrl;
181
182 /* Ethtool settings */
183 u8 duplex;
184 u32 speed;
185
186 unsigned long guest_offloads;
187 };
188
189 struct padded_vnet_hdr {
190 struct virtio_net_hdr_mrg_rxbuf hdr;
191 /*
192 * hdr is in a separate sg buffer, and data sg buffer shares same page
193 * with this header sg. This padding makes next sg 16 byte aligned
194 * after the header.
195 */
196 char padding[4];
197 };
198
199 /* Converting between virtqueue no. and kernel tx/rx queue no.
200 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
201 */
202 static int vq2txq(struct virtqueue *vq)
203 {
204 return (vq->index - 1) / 2;
205 }
206
207 static int txq2vq(int txq)
208 {
209 return txq * 2 + 1;
210 }
211
212 static int vq2rxq(struct virtqueue *vq)
213 {
214 return vq->index / 2;
215 }
216
217 static int rxq2vq(int rxq)
218 {
219 return rxq * 2;
220 }
221
222 static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
223 {
224 return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;
225 }
226
227 /*
228 * private is used to chain pages for big packets, put the whole
229 * most recent used list in the beginning for reuse
230 */
231 static void give_pages(struct receive_queue *rq, struct page *page)
232 {
233 struct page *end;
234
235 /* Find end of list, sew whole thing into vi->rq.pages. */
236 for (end = page; end->private; end = (struct page *)end->private);
237 end->private = (unsigned long)rq->pages;
238 rq->pages = page;
239 }
240
241 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
242 {
243 struct page *p = rq->pages;
244
245 if (p) {
246 rq->pages = (struct page *)p->private;
247 /* clear private here, it is used to chain pages */
248 p->private = 0;
249 } else
250 p = alloc_page(gfp_mask);
251 return p;
252 }
253
254 static void virtqueue_napi_schedule(struct napi_struct *napi,
255 struct virtqueue *vq)
256 {
257 if (napi_schedule_prep(napi)) {
258 virtqueue_disable_cb(vq);
259 __napi_schedule(napi);
260 }
261 }
262
263 static void virtqueue_napi_complete(struct napi_struct *napi,
264 struct virtqueue *vq, int processed)
265 {
266 int opaque;
267
268 opaque = virtqueue_enable_cb_prepare(vq);
269 if (napi_complete_done(napi, processed)) {
270 if (unlikely(virtqueue_poll(vq, opaque)))
271 virtqueue_napi_schedule(napi, vq);
272 } else {
273 virtqueue_disable_cb(vq);
274 }
275 }
276
277 static void skb_xmit_done(struct virtqueue *vq)
278 {
279 struct virtnet_info *vi = vq->vdev->priv;
280 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
281
282 /* Suppress further interrupts. */
283 virtqueue_disable_cb(vq);
284
285 if (napi->weight)
286 virtqueue_napi_schedule(napi, vq);
287 else
288 /* We were probably waiting for more output buffers. */
289 netif_wake_subqueue(vi->dev, vq2txq(vq));
290 }
291
292 #define MRG_CTX_HEADER_SHIFT 22
293 static void *mergeable_len_to_ctx(unsigned int truesize,
294 unsigned int headroom)
295 {
296 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
297 }
298
299 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
300 {
301 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
302 }
303
304 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
305 {
306 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
307 }
308
309 /* Called from bottom half context */
310 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
311 struct receive_queue *rq,
312 struct page *page, unsigned int offset,
313 unsigned int len, unsigned int truesize)
314 {
315 struct sk_buff *skb;
316 struct virtio_net_hdr_mrg_rxbuf *hdr;
317 unsigned int copy, hdr_len, hdr_padded_len;
318 char *p;
319
320 p = page_address(page) + offset;
321
322 /* copy small packet so we can reuse these pages for small data */
323 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
324 if (unlikely(!skb))
325 return NULL;
326
327 hdr = skb_vnet_hdr(skb);
328
329 hdr_len = vi->hdr_len;
330 if (vi->mergeable_rx_bufs)
331 hdr_padded_len = sizeof(*hdr);
332 else
333 hdr_padded_len = sizeof(struct padded_vnet_hdr);
334
335 memcpy(hdr, p, hdr_len);
336
337 len -= hdr_len;
338 offset += hdr_padded_len;
339 p += hdr_padded_len;
340
341 copy = len;
342 if (copy > skb_tailroom(skb))
343 copy = skb_tailroom(skb);
344 skb_put_data(skb, p, copy);
345
346 len -= copy;
347 offset += copy;
348
349 if (vi->mergeable_rx_bufs) {
350 if (len)
351 skb_add_rx_frag(skb, 0, page, offset, len, truesize);
352 else
353 put_page(page);
354 return skb;
355 }
356
357 /*
358 * Verify that we can indeed put this data into a skb.
359 * This is here to handle cases when the device erroneously
360 * tries to receive more than is possible. This is usually
361 * the case of a broken device.
362 */
363 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
364 net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
365 dev_kfree_skb(skb);
366 return NULL;
367 }
368 BUG_ON(offset >= PAGE_SIZE);
369 while (len) {
370 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
371 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
372 frag_size, truesize);
373 len -= frag_size;
374 page = (struct page *)page->private;
375 offset = 0;
376 }
377
378 if (page)
379 give_pages(rq, page);
380
381 return skb;
382 }
383
384 static void virtnet_xdp_flush(struct net_device *dev)
385 {
386 struct virtnet_info *vi = netdev_priv(dev);
387 struct send_queue *sq;
388 unsigned int qp;
389
390 qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
391 sq = &vi->sq[qp];
392
393 virtqueue_kick(sq->vq);
394 }
395
396 static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
397 struct xdp_buff *xdp)
398 {
399 struct virtio_net_hdr_mrg_rxbuf *hdr;
400 unsigned int len;
401 struct send_queue *sq;
402 unsigned int qp;
403 void *xdp_sent;
404 int err;
405
406 qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
407 sq = &vi->sq[qp];
408
409 /* Free up any pending old buffers before queueing new ones. */
410 while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
411 struct page *sent_page = virt_to_head_page(xdp_sent);
412
413 put_page(sent_page);
414 }
415
416 xdp->data -= vi->hdr_len;
417 /* Zero header and leave csum up to XDP layers */
418 hdr = xdp->data;
419 memset(hdr, 0, vi->hdr_len);
420
421 sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
422
423 err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC);
424 if (unlikely(err))
425 return false; /* Caller handle free/refcnt */
426
427 return true;
428 }
429
430 static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
431 {
432 struct virtnet_info *vi = netdev_priv(dev);
433 bool sent = __virtnet_xdp_xmit(vi, xdp);
434
435 if (!sent)
436 return -ENOSPC;
437 return 0;
438 }
439
440 static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
441 {
442 return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
443 }
444
445 /* We copy the packet for XDP in the following cases:
446 *
447 * 1) Packet is scattered across multiple rx buffers.
448 * 2) Headroom space is insufficient.
449 *
450 * This is inefficient but it's a temporary condition that
451 * we hit right after XDP is enabled and until queue is refilled
452 * with large buffers with sufficient headroom - so it should affect
453 * at most queue size packets.
454 * Afterwards, the conditions to enable
455 * XDP should preclude the underlying device from sending packets
456 * across multiple buffers (num_buf > 1), and we make sure buffers
457 * have enough headroom.
458 */
459 static struct page *xdp_linearize_page(struct receive_queue *rq,
460 u16 *num_buf,
461 struct page *p,
462 int offset,
463 int page_off,
464 unsigned int *len)
465 {
466 struct page *page = alloc_page(GFP_ATOMIC);
467
468 if (!page)
469 return NULL;
470
471 memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
472 page_off += *len;
473
474 while (--*num_buf) {
475 unsigned int buflen;
476 void *buf;
477 int off;
478
479 buf = virtqueue_get_buf(rq->vq, &buflen);
480 if (unlikely(!buf))
481 goto err_buf;
482
483 p = virt_to_head_page(buf);
484 off = buf - page_address(p);
485
486 /* guard against a misconfigured or uncooperative backend that
487 * is sending packet larger than the MTU.
488 */
489 if ((page_off + buflen) > PAGE_SIZE) {
490 put_page(p);
491 goto err_buf;
492 }
493
494 memcpy(page_address(page) + page_off,
495 page_address(p) + off, buflen);
496 page_off += buflen;
497 put_page(p);
498 }
499
500 /* Headroom does not contribute to packet length */
501 *len = page_off - VIRTIO_XDP_HEADROOM;
502 return page;
503 err_buf:
504 __free_pages(page, 0);
505 return NULL;
506 }
507
508 static struct sk_buff *receive_small(struct net_device *dev,
509 struct virtnet_info *vi,
510 struct receive_queue *rq,
511 void *buf, void *ctx,
512 unsigned int len,
513 bool *xdp_xmit)
514 {
515 struct sk_buff *skb;
516 struct bpf_prog *xdp_prog;
517 unsigned int xdp_headroom = (unsigned long)ctx;
518 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
519 unsigned int headroom = vi->hdr_len + header_offset;
520 unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
521 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
522 struct page *page = virt_to_head_page(buf);
523 unsigned int delta = 0;
524 struct page *xdp_page;
525 bool sent;
526 int err;
527
528 len -= vi->hdr_len;
529
530 rcu_read_lock();
531 xdp_prog = rcu_dereference(rq->xdp_prog);
532 if (xdp_prog) {
533 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
534 struct xdp_buff xdp;
535 void *orig_data;
536 u32 act;
537
538 if (unlikely(hdr->hdr.gso_type))
539 goto err_xdp;
540
541 if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
542 int offset = buf - page_address(page) + header_offset;
543 unsigned int tlen = len + vi->hdr_len;
544 u16 num_buf = 1;
545
546 xdp_headroom = virtnet_get_headroom(vi);
547 header_offset = VIRTNET_RX_PAD + xdp_headroom;
548 headroom = vi->hdr_len + header_offset;
549 buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
550 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
551 xdp_page = xdp_linearize_page(rq, &num_buf, page,
552 offset, header_offset,
553 &tlen);
554 if (!xdp_page)
555 goto err_xdp;
556
557 buf = page_address(xdp_page);
558 put_page(page);
559 page = xdp_page;
560 }
561
562 xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
563 xdp.data = xdp.data_hard_start + xdp_headroom;
564 xdp_set_data_meta_invalid(&xdp);
565 xdp.data_end = xdp.data + len;
566 orig_data = xdp.data;
567 act = bpf_prog_run_xdp(xdp_prog, &xdp);
568
569 switch (act) {
570 case XDP_PASS:
571 /* Recalculate length in case bpf program changed it */
572 delta = orig_data - xdp.data;
573 break;
574 case XDP_TX:
575 sent = __virtnet_xdp_xmit(vi, &xdp);
576 if (unlikely(!sent)) {
577 trace_xdp_exception(vi->dev, xdp_prog, act);
578 goto err_xdp;
579 }
580 *xdp_xmit = true;
581 rcu_read_unlock();
582 goto xdp_xmit;
583 case XDP_REDIRECT:
584 err = xdp_do_redirect(dev, &xdp, xdp_prog);
585 if (err)
586 goto err_xdp;
587 *xdp_xmit = true;
588 rcu_read_unlock();
589 goto xdp_xmit;
590 default:
591 bpf_warn_invalid_xdp_action(act);
592 case XDP_ABORTED:
593 trace_xdp_exception(vi->dev, xdp_prog, act);
594 case XDP_DROP:
595 goto err_xdp;
596 }
597 }
598 rcu_read_unlock();
599
600 skb = build_skb(buf, buflen);
601 if (!skb) {
602 put_page(page);
603 goto err;
604 }
605 skb_reserve(skb, headroom - delta);
606 skb_put(skb, len + delta);
607 if (!delta) {
608 buf += header_offset;
609 memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
610 } /* keep zeroed vnet hdr since packet was changed by bpf */
611
612 err:
613 return skb;
614
615 err_xdp:
616 rcu_read_unlock();
617 dev->stats.rx_dropped++;
618 put_page(page);
619 xdp_xmit:
620 return NULL;
621 }
622
623 static struct sk_buff *receive_big(struct net_device *dev,
624 struct virtnet_info *vi,
625 struct receive_queue *rq,
626 void *buf,
627 unsigned int len)
628 {
629 struct page *page = buf;
630 struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
631
632 if (unlikely(!skb))
633 goto err;
634
635 return skb;
636
637 err:
638 dev->stats.rx_dropped++;
639 give_pages(rq, page);
640 return NULL;
641 }
642
643 static struct sk_buff *receive_mergeable(struct net_device *dev,
644 struct virtnet_info *vi,
645 struct receive_queue *rq,
646 void *buf,
647 void *ctx,
648 unsigned int len,
649 bool *xdp_xmit)
650 {
651 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
652 u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
653 struct page *page = virt_to_head_page(buf);
654 int offset = buf - page_address(page);
655 struct sk_buff *head_skb, *curr_skb;
656 struct bpf_prog *xdp_prog;
657 unsigned int truesize;
658 unsigned int headroom = mergeable_ctx_to_headroom(ctx);
659 int err;
660 bool sent;
661
662 head_skb = NULL;
663
664 rcu_read_lock();
665 xdp_prog = rcu_dereference(rq->xdp_prog);
666 if (xdp_prog) {
667 struct page *xdp_page;
668 struct xdp_buff xdp;
669 void *data;
670 u32 act;
671
672 /* Transient failure which in theory could occur if
673 * in-flight packets from before XDP was enabled reach
674 * the receive path after XDP is loaded.
675 */
676 if (unlikely(hdr->hdr.gso_type))
677 goto err_xdp;
678
679 /* This happens when rx buffer size is underestimated */
680 if (unlikely(num_buf > 1 ||
681 headroom < virtnet_get_headroom(vi))) {
682 /* linearize data for XDP */
683 xdp_page = xdp_linearize_page(rq, &num_buf,
684 page, offset,
685 VIRTIO_XDP_HEADROOM,
686 &len);
687 if (!xdp_page)
688 goto err_xdp;
689 offset = VIRTIO_XDP_HEADROOM;
690 } else {
691 xdp_page = page;
692 }
693
694 /* Allow consuming headroom but reserve enough space to push
695 * the descriptor on if we get an XDP_TX return code.
696 */
697 data = page_address(xdp_page) + offset;
698 xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
699 xdp.data = data + vi->hdr_len;
700 xdp_set_data_meta_invalid(&xdp);
701 xdp.data_end = xdp.data + (len - vi->hdr_len);
702 act = bpf_prog_run_xdp(xdp_prog, &xdp);
703
704 if (act != XDP_PASS)
705 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
706
707 switch (act) {
708 case XDP_PASS:
709 /* recalculate offset to account for any header
710 * adjustments. Note other cases do not build an
711 * skb and avoid using offset
712 */
713 offset = xdp.data -
714 page_address(xdp_page) - vi->hdr_len;
715
716 /* We can only create skb based on xdp_page. */
717 if (unlikely(xdp_page != page)) {
718 rcu_read_unlock();
719 put_page(page);
720 head_skb = page_to_skb(vi, rq, xdp_page,
721 offset, len, PAGE_SIZE);
722 return head_skb;
723 }
724 break;
725 case XDP_TX:
726 sent = __virtnet_xdp_xmit(vi, &xdp);
727 if (unlikely(!sent)) {
728 trace_xdp_exception(vi->dev, xdp_prog, act);
729 if (unlikely(xdp_page != page))
730 put_page(xdp_page);
731 goto err_xdp;
732 }
733 *xdp_xmit = true;
734 if (unlikely(xdp_page != page))
735 put_page(page);
736 rcu_read_unlock();
737 goto xdp_xmit;
738 case XDP_REDIRECT:
739 err = xdp_do_redirect(dev, &xdp, xdp_prog);
740 if (!err)
741 *xdp_xmit = true;
742 rcu_read_unlock();
743 goto xdp_xmit;
744 default:
745 bpf_warn_invalid_xdp_action(act);
746 case XDP_ABORTED:
747 trace_xdp_exception(vi->dev, xdp_prog, act);
748 case XDP_DROP:
749 if (unlikely(xdp_page != page))
750 __free_pages(xdp_page, 0);
751 goto err_xdp;
752 }
753 }
754 rcu_read_unlock();
755
756 truesize = mergeable_ctx_to_truesize(ctx);
757 if (unlikely(len > truesize)) {
758 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
759 dev->name, len, (unsigned long)ctx);
760 dev->stats.rx_length_errors++;
761 goto err_skb;
762 }
763
764 head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
765 curr_skb = head_skb;
766
767 if (unlikely(!curr_skb))
768 goto err_skb;
769 while (--num_buf) {
770 int num_skb_frags;
771
772 buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
773 if (unlikely(!buf)) {
774 pr_debug("%s: rx error: %d buffers out of %d missing\n",
775 dev->name, num_buf,
776 virtio16_to_cpu(vi->vdev,
777 hdr->num_buffers));
778 dev->stats.rx_length_errors++;
779 goto err_buf;
780 }
781
782 page = virt_to_head_page(buf);
783
784 truesize = mergeable_ctx_to_truesize(ctx);
785 if (unlikely(len > truesize)) {
786 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
787 dev->name, len, (unsigned long)ctx);
788 dev->stats.rx_length_errors++;
789 goto err_skb;
790 }
791
792 num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
793 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
794 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
795
796 if (unlikely(!nskb))
797 goto err_skb;
798 if (curr_skb == head_skb)
799 skb_shinfo(curr_skb)->frag_list = nskb;
800 else
801 curr_skb->next = nskb;
802 curr_skb = nskb;
803 head_skb->truesize += nskb->truesize;
804 num_skb_frags = 0;
805 }
806 if (curr_skb != head_skb) {
807 head_skb->data_len += len;
808 head_skb->len += len;
809 head_skb->truesize += truesize;
810 }
811 offset = buf - page_address(page);
812 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
813 put_page(page);
814 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
815 len, truesize);
816 } else {
817 skb_add_rx_frag(curr_skb, num_skb_frags, page,
818 offset, len, truesize);
819 }
820 }
821
822 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
823 return head_skb;
824
825 err_xdp:
826 rcu_read_unlock();
827 err_skb:
828 put_page(page);
829 while (num_buf-- > 1) {
830 buf = virtqueue_get_buf(rq->vq, &len);
831 if (unlikely(!buf)) {
832 pr_debug("%s: rx error: %d buffers missing\n",
833 dev->name, num_buf);
834 dev->stats.rx_length_errors++;
835 break;
836 }
837 page = virt_to_head_page(buf);
838 put_page(page);
839 }
840 err_buf:
841 dev->stats.rx_dropped++;
842 dev_kfree_skb(head_skb);
843 xdp_xmit:
844 return NULL;
845 }
846
847 static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
848 void *buf, unsigned int len, void **ctx, bool *xdp_xmit)
849 {
850 struct net_device *dev = vi->dev;
851 struct sk_buff *skb;
852 struct virtio_net_hdr_mrg_rxbuf *hdr;
853 int ret;
854
855 if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
856 pr_debug("%s: short packet %i\n", dev->name, len);
857 dev->stats.rx_length_errors++;
858 if (vi->mergeable_rx_bufs) {
859 put_page(virt_to_head_page(buf));
860 } else if (vi->big_packets) {
861 give_pages(rq, buf);
862 } else {
863 put_page(virt_to_head_page(buf));
864 }
865 return 0;
866 }
867
868 if (vi->mergeable_rx_bufs)
869 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit);
870 else if (vi->big_packets)
871 skb = receive_big(dev, vi, rq, buf, len);
872 else
873 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit);
874
875 if (unlikely(!skb))
876 return 0;
877
878 hdr = skb_vnet_hdr(skb);
879
880 ret = skb->len;
881
882 if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
883 skb->ip_summed = CHECKSUM_UNNECESSARY;
884
885 if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
886 virtio_is_little_endian(vi->vdev))) {
887 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
888 dev->name, hdr->hdr.gso_type,
889 hdr->hdr.gso_size);
890 goto frame_err;
891 }
892
893 skb->protocol = eth_type_trans(skb, dev);
894 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
895 ntohs(skb->protocol), skb->len, skb->pkt_type);
896
897 napi_gro_receive(&rq->napi, skb);
898 return ret;
899
900 frame_err:
901 dev->stats.rx_frame_errors++;
902 dev_kfree_skb(skb);
903 return 0;
904 }
905
906 /* Unlike mergeable buffers, all buffers are allocated to the
907 * same size, except for the headroom. For this reason we do
908 * not need to use mergeable_len_to_ctx here - it is enough
909 * to store the headroom as the context ignoring the truesize.
910 */
911 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
912 gfp_t gfp)
913 {
914 struct page_frag *alloc_frag = &rq->alloc_frag;
915 char *buf;
916 unsigned int xdp_headroom = virtnet_get_headroom(vi);
917 void *ctx = (void *)(unsigned long)xdp_headroom;
918 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
919 int err;
920
921 len = SKB_DATA_ALIGN(len) +
922 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
923 if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
924 return -ENOMEM;
925
926 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
927 get_page(alloc_frag->page);
928 alloc_frag->offset += len;
929 sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
930 vi->hdr_len + GOOD_PACKET_LEN);
931 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
932 if (err < 0)
933 put_page(virt_to_head_page(buf));
934 return err;
935 }
936
937 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
938 gfp_t gfp)
939 {
940 struct page *first, *list = NULL;
941 char *p;
942 int i, err, offset;
943
944 sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
945
946 /* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
947 for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
948 first = get_a_page(rq, gfp);
949 if (!first) {
950 if (list)
951 give_pages(rq, list);
952 return -ENOMEM;
953 }
954 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
955
956 /* chain new page in list head to match sg */
957 first->private = (unsigned long)list;
958 list = first;
959 }
960
961 first = get_a_page(rq, gfp);
962 if (!first) {
963 give_pages(rq, list);
964 return -ENOMEM;
965 }
966 p = page_address(first);
967
968 /* rq->sg[0], rq->sg[1] share the same page */
969 /* a separated rq->sg[0] for header - required in case !any_header_sg */
970 sg_set_buf(&rq->sg[0], p, vi->hdr_len);
971
972 /* rq->sg[1] for data packet, from offset */
973 offset = sizeof(struct padded_vnet_hdr);
974 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
975
976 /* chain first in list head */
977 first->private = (unsigned long)list;
978 err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
979 first, gfp);
980 if (err < 0)
981 give_pages(rq, first);
982
983 return err;
984 }
985
986 static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
987 struct ewma_pkt_len *avg_pkt_len)
988 {
989 const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
990 unsigned int len;
991
992 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
993 rq->min_buf_len, PAGE_SIZE - hdr_len);
994 return ALIGN(len, L1_CACHE_BYTES);
995 }
996
997 static int add_recvbuf_mergeable(struct virtnet_info *vi,
998 struct receive_queue *rq, gfp_t gfp)
999 {
1000 struct page_frag *alloc_frag = &rq->alloc_frag;
1001 unsigned int headroom = virtnet_get_headroom(vi);
1002 char *buf;
1003 void *ctx;
1004 int err;
1005 unsigned int len, hole;
1006
1007 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
1008 if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
1009 return -ENOMEM;
1010
1011 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
1012 buf += headroom; /* advance address leaving hole at front of pkt */
1013 get_page(alloc_frag->page);
1014 alloc_frag->offset += len + headroom;
1015 hole = alloc_frag->size - alloc_frag->offset;
1016 if (hole < len + headroom) {
1017 /* To avoid internal fragmentation, if there is very likely not
1018 * enough space for another buffer, add the remaining space to
1019 * the current buffer.
1020 */
1021 len += hole;
1022 alloc_frag->offset += hole;
1023 }
1024
1025 sg_init_one(rq->sg, buf, len);
1026 ctx = mergeable_len_to_ctx(len, headroom);
1027 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
1028 if (err < 0)
1029 put_page(virt_to_head_page(buf));
1030
1031 return err;
1032 }
1033
1034 /*
1035 * Returns false if we couldn't fill entirely (OOM).
1036 *
1037 * Normally run in the receive path, but can also be run from ndo_open
1038 * before we're receiving packets, or from refill_work which is
1039 * careful to disable receiving (using napi_disable).
1040 */
1041 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
1042 gfp_t gfp)
1043 {
1044 int err;
1045 bool oom;
1046
1047 do {
1048 if (vi->mergeable_rx_bufs)
1049 err = add_recvbuf_mergeable(vi, rq, gfp);
1050 else if (vi->big_packets)
1051 err = add_recvbuf_big(vi, rq, gfp);
1052 else
1053 err = add_recvbuf_small(vi, rq, gfp);
1054
1055 oom = err == -ENOMEM;
1056 if (err)
1057 break;
1058 } while (rq->vq->num_free);
1059 virtqueue_kick(rq->vq);
1060 return !oom;
1061 }
1062
1063 static void skb_recv_done(struct virtqueue *rvq)
1064 {
1065 struct virtnet_info *vi = rvq->vdev->priv;
1066 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
1067
1068 virtqueue_napi_schedule(&rq->napi, rvq);
1069 }
1070
1071 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
1072 {
1073 napi_enable(napi);
1074
1075 /* If all buffers were filled by other side before we napi_enabled, we
1076 * won't get another interrupt, so process any outstanding packets now.
1077 * Call local_bh_enable after to trigger softIRQ processing.
1078 */
1079 local_bh_disable();
1080 virtqueue_napi_schedule(napi, vq);
1081 local_bh_enable();
1082 }
1083
1084 static void virtnet_napi_tx_enable(struct virtnet_info *vi,
1085 struct virtqueue *vq,
1086 struct napi_struct *napi)
1087 {
1088 if (!napi->weight)
1089 return;
1090
1091 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
1092 * enable the feature if this is likely affine with the transmit path.
1093 */
1094 if (!vi->affinity_hint_set) {
1095 napi->weight = 0;
1096 return;
1097 }
1098
1099 return virtnet_napi_enable(vq, napi);
1100 }
1101
1102 static void virtnet_napi_tx_disable(struct napi_struct *napi)
1103 {
1104 if (napi->weight)
1105 napi_disable(napi);
1106 }
1107
1108 static void refill_work(struct work_struct *work)
1109 {
1110 struct virtnet_info *vi =
1111 container_of(work, struct virtnet_info, refill.work);
1112 bool still_empty;
1113 int i;
1114
1115 for (i = 0; i < vi->curr_queue_pairs; i++) {
1116 struct receive_queue *rq = &vi->rq[i];
1117
1118 napi_disable(&rq->napi);
1119 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
1120 virtnet_napi_enable(rq->vq, &rq->napi);
1121
1122 /* In theory, this can happen: if we don't get any buffers in
1123 * we will *never* try to fill again.
1124 */
1125 if (still_empty)
1126 schedule_delayed_work(&vi->refill, HZ/2);
1127 }
1128 }
1129
1130 static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit)
1131 {
1132 struct virtnet_info *vi = rq->vq->vdev->priv;
1133 unsigned int len, received = 0, bytes = 0;
1134 void *buf;
1135 struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
1136
1137 if (!vi->big_packets || vi->mergeable_rx_bufs) {
1138 void *ctx;
1139
1140 while (received < budget &&
1141 (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
1142 bytes += receive_buf(vi, rq, buf, len, ctx, xdp_xmit);
1143 received++;
1144 }
1145 } else {
1146 while (received < budget &&
1147 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
1148 bytes += receive_buf(vi, rq, buf, len, NULL, xdp_xmit);
1149 received++;
1150 }
1151 }
1152
1153 if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
1154 if (!try_fill_recv(vi, rq, GFP_ATOMIC))
1155 schedule_delayed_work(&vi->refill, 0);
1156 }
1157
1158 u64_stats_update_begin(&stats->rx_syncp);
1159 stats->rx_bytes += bytes;
1160 stats->rx_packets += received;
1161 u64_stats_update_end(&stats->rx_syncp);
1162
1163 return received;
1164 }
1165
1166 static void free_old_xmit_skbs(struct send_queue *sq)
1167 {
1168 struct sk_buff *skb;
1169 unsigned int len;
1170 struct virtnet_info *vi = sq->vq->vdev->priv;
1171 struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
1172 unsigned int packets = 0;
1173 unsigned int bytes = 0;
1174
1175 while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
1176 pr_debug("Sent skb %p\n", skb);
1177
1178 bytes += skb->len;
1179 packets++;
1180
1181 dev_consume_skb_any(skb);
1182 }
1183
1184 /* Avoid overhead when no packets have been processed
1185 * happens when called speculatively from start_xmit.
1186 */
1187 if (!packets)
1188 return;
1189
1190 u64_stats_update_begin(&stats->tx_syncp);
1191 stats->tx_bytes += bytes;
1192 stats->tx_packets += packets;
1193 u64_stats_update_end(&stats->tx_syncp);
1194 }
1195
1196 static void virtnet_poll_cleantx(struct receive_queue *rq)
1197 {
1198 struct virtnet_info *vi = rq->vq->vdev->priv;
1199 unsigned int index = vq2rxq(rq->vq);
1200 struct send_queue *sq = &vi->sq[index];
1201 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
1202
1203 if (!sq->napi.weight)
1204 return;
1205
1206 if (__netif_tx_trylock(txq)) {
1207 free_old_xmit_skbs(sq);
1208 __netif_tx_unlock(txq);
1209 }
1210
1211 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
1212 netif_tx_wake_queue(txq);
1213 }
1214
1215 static int virtnet_poll(struct napi_struct *napi, int budget)
1216 {
1217 struct receive_queue *rq =
1218 container_of(napi, struct receive_queue, napi);
1219 struct virtnet_info *vi = rq->vq->vdev->priv;
1220 struct send_queue *sq;
1221 unsigned int received, qp;
1222 bool xdp_xmit = false;
1223
1224 virtnet_poll_cleantx(rq);
1225
1226 received = virtnet_receive(rq, budget, &xdp_xmit);
1227
1228 /* Out of packets? */
1229 if (received < budget)
1230 virtqueue_napi_complete(napi, rq->vq, received);
1231
1232 if (xdp_xmit) {
1233 qp = vi->curr_queue_pairs - vi->xdp_queue_pairs +
1234 smp_processor_id();
1235 sq = &vi->sq[qp];
1236 virtqueue_kick(sq->vq);
1237 xdp_do_flush_map();
1238 }
1239
1240 return received;
1241 }
1242
1243 static int virtnet_open(struct net_device *dev)
1244 {
1245 struct virtnet_info *vi = netdev_priv(dev);
1246 int i;
1247
1248 for (i = 0; i < vi->max_queue_pairs; i++) {
1249 if (i < vi->curr_queue_pairs)
1250 /* Make sure we have some buffers: if oom use wq. */
1251 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
1252 schedule_delayed_work(&vi->refill, 0);
1253 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
1254 virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
1255 }
1256
1257 return 0;
1258 }
1259
1260 static int virtnet_poll_tx(struct napi_struct *napi, int budget)
1261 {
1262 struct send_queue *sq = container_of(napi, struct send_queue, napi);
1263 struct virtnet_info *vi = sq->vq->vdev->priv;
1264 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
1265
1266 __netif_tx_lock(txq, raw_smp_processor_id());
1267 free_old_xmit_skbs(sq);
1268 __netif_tx_unlock(txq);
1269
1270 virtqueue_napi_complete(napi, sq->vq, 0);
1271
1272 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
1273 netif_tx_wake_queue(txq);
1274
1275 return 0;
1276 }
1277
1278 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
1279 {
1280 struct virtio_net_hdr_mrg_rxbuf *hdr;
1281 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
1282 struct virtnet_info *vi = sq->vq->vdev->priv;
1283 int num_sg;
1284 unsigned hdr_len = vi->hdr_len;
1285 bool can_push;
1286
1287 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
1288
1289 can_push = vi->any_header_sg &&
1290 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
1291 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
1292 /* Even if we can, don't push here yet as this would skew
1293 * csum_start offset below. */
1294 if (can_push)
1295 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
1296 else
1297 hdr = skb_vnet_hdr(skb);
1298
1299 if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
1300 virtio_is_little_endian(vi->vdev), false,
1301 0))
1302 BUG();
1303
1304 if (vi->mergeable_rx_bufs)
1305 hdr->num_buffers = 0;
1306
1307 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
1308 if (can_push) {
1309 __skb_push(skb, hdr_len);
1310 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
1311 if (unlikely(num_sg < 0))
1312 return num_sg;
1313 /* Pull header back to avoid skew in tx bytes calculations. */
1314 __skb_pull(skb, hdr_len);
1315 } else {
1316 sg_set_buf(sq->sg, hdr, hdr_len);
1317 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
1318 if (unlikely(num_sg < 0))
1319 return num_sg;
1320 num_sg++;
1321 }
1322 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
1323 }
1324
1325 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
1326 {
1327 struct virtnet_info *vi = netdev_priv(dev);
1328 int qnum = skb_get_queue_mapping(skb);
1329 struct send_queue *sq = &vi->sq[qnum];
1330 int err;
1331 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
1332 bool kick = !skb->xmit_more;
1333 bool use_napi = sq->napi.weight;
1334
1335 /* Free up any pending old buffers before queueing new ones. */
1336 free_old_xmit_skbs(sq);
1337
1338 if (use_napi && kick)
1339 virtqueue_enable_cb_delayed(sq->vq);
1340
1341 /* timestamp packet in software */
1342 skb_tx_timestamp(skb);
1343
1344 /* Try to transmit */
1345 err = xmit_skb(sq, skb);
1346
1347 /* This should not happen! */
1348 if (unlikely(err)) {
1349 dev->stats.tx_fifo_errors++;
1350 if (net_ratelimit())
1351 dev_warn(&dev->dev,
1352 "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
1353 dev->stats.tx_dropped++;
1354 dev_kfree_skb_any(skb);
1355 return NETDEV_TX_OK;
1356 }
1357
1358 /* Don't wait up for transmitted skbs to be freed. */
1359 if (!use_napi) {
1360 skb_orphan(skb);
1361 nf_reset(skb);
1362 }
1363
1364 /* If running out of space, stop queue to avoid getting packets that we
1365 * are then unable to transmit.
1366 * An alternative would be to force queuing layer to requeue the skb by
1367 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
1368 * returned in a normal path of operation: it means that driver is not
1369 * maintaining the TX queue stop/start state properly, and causes
1370 * the stack to do a non-trivial amount of useless work.
1371 * Since most packets only take 1 or 2 ring slots, stopping the queue
1372 * early means 16 slots are typically wasted.
1373 */
1374 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
1375 netif_stop_subqueue(dev, qnum);
1376 if (!use_napi &&
1377 unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
1378 /* More just got used, free them then recheck. */
1379 free_old_xmit_skbs(sq);
1380 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
1381 netif_start_subqueue(dev, qnum);
1382 virtqueue_disable_cb(sq->vq);
1383 }
1384 }
1385 }
1386
1387 if (kick || netif_xmit_stopped(txq))
1388 virtqueue_kick(sq->vq);
1389
1390 return NETDEV_TX_OK;
1391 }
1392
1393 /*
1394 * Send command via the control virtqueue and check status. Commands
1395 * supported by the hypervisor, as indicated by feature bits, should
1396 * never fail unless improperly formatted.
1397 */
1398 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
1399 struct scatterlist *out)
1400 {
1401 struct scatterlist *sgs[4], hdr, stat;
1402 unsigned out_num = 0, tmp;
1403
1404 /* Caller should know better */
1405 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
1406
1407 vi->ctrl->status = ~0;
1408 vi->ctrl->hdr.class = class;
1409 vi->ctrl->hdr.cmd = cmd;
1410 /* Add header */
1411 sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
1412 sgs[out_num++] = &hdr;
1413
1414 if (out)
1415 sgs[out_num++] = out;
1416
1417 /* Add return status. */
1418 sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
1419 sgs[out_num] = &stat;
1420
1421 BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
1422 virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
1423
1424 if (unlikely(!virtqueue_kick(vi->cvq)))
1425 return vi->ctrl->status == VIRTIO_NET_OK;
1426
1427 /* Spin for a response, the kick causes an ioport write, trapping
1428 * into the hypervisor, so the request should be handled immediately.
1429 */
1430 while (!virtqueue_get_buf(vi->cvq, &tmp) &&
1431 !virtqueue_is_broken(vi->cvq))
1432 cpu_relax();
1433
1434 return vi->ctrl->status == VIRTIO_NET_OK;
1435 }
1436
1437 static int virtnet_set_mac_address(struct net_device *dev, void *p)
1438 {
1439 struct virtnet_info *vi = netdev_priv(dev);
1440 struct virtio_device *vdev = vi->vdev;
1441 int ret;
1442 struct sockaddr *addr;
1443 struct scatterlist sg;
1444
1445 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
1446 if (!addr)
1447 return -ENOMEM;
1448
1449 ret = eth_prepare_mac_addr_change(dev, addr);
1450 if (ret)
1451 goto out;
1452
1453 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1454 sg_init_one(&sg, addr->sa_data, dev->addr_len);
1455 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1456 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
1457 dev_warn(&vdev->dev,
1458 "Failed to set mac address by vq command.\n");
1459 ret = -EINVAL;
1460 goto out;
1461 }
1462 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
1463 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1464 unsigned int i;
1465
1466 /* Naturally, this has an atomicity problem. */
1467 for (i = 0; i < dev->addr_len; i++)
1468 virtio_cwrite8(vdev,
1469 offsetof(struct virtio_net_config, mac) +
1470 i, addr->sa_data[i]);
1471 }
1472
1473 eth_commit_mac_addr_change(dev, p);
1474 ret = 0;
1475
1476 out:
1477 kfree(addr);
1478 return ret;
1479 }
1480
1481 static void virtnet_stats(struct net_device *dev,
1482 struct rtnl_link_stats64 *tot)
1483 {
1484 struct virtnet_info *vi = netdev_priv(dev);
1485 int cpu;
1486 unsigned int start;
1487
1488 for_each_possible_cpu(cpu) {
1489 struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
1490 u64 tpackets, tbytes, rpackets, rbytes;
1491
1492 do {
1493 start = u64_stats_fetch_begin_irq(&stats->tx_syncp);
1494 tpackets = stats->tx_packets;
1495 tbytes = stats->tx_bytes;
1496 } while (u64_stats_fetch_retry_irq(&stats->tx_syncp, start));
1497
1498 do {
1499 start = u64_stats_fetch_begin_irq(&stats->rx_syncp);
1500 rpackets = stats->rx_packets;
1501 rbytes = stats->rx_bytes;
1502 } while (u64_stats_fetch_retry_irq(&stats->rx_syncp, start));
1503
1504 tot->rx_packets += rpackets;
1505 tot->tx_packets += tpackets;
1506 tot->rx_bytes += rbytes;
1507 tot->tx_bytes += tbytes;
1508 }
1509
1510 tot->tx_dropped = dev->stats.tx_dropped;
1511 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
1512 tot->rx_dropped = dev->stats.rx_dropped;
1513 tot->rx_length_errors = dev->stats.rx_length_errors;
1514 tot->rx_frame_errors = dev->stats.rx_frame_errors;
1515 }
1516
1517 #ifdef CONFIG_NET_POLL_CONTROLLER
1518 static void virtnet_netpoll(struct net_device *dev)
1519 {
1520 struct virtnet_info *vi = netdev_priv(dev);
1521 int i;
1522
1523 for (i = 0; i < vi->curr_queue_pairs; i++)
1524 napi_schedule(&vi->rq[i].napi);
1525 }
1526 #endif
1527
1528 static void virtnet_ack_link_announce(struct virtnet_info *vi)
1529 {
1530 rtnl_lock();
1531 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
1532 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
1533 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
1534 rtnl_unlock();
1535 }
1536
1537 static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
1538 {
1539 struct scatterlist sg;
1540 struct net_device *dev = vi->dev;
1541
1542 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
1543 return 0;
1544
1545 vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
1546 sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq));
1547
1548 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
1549 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
1550 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
1551 queue_pairs);
1552 return -EINVAL;
1553 } else {
1554 vi->curr_queue_pairs = queue_pairs;
1555 /* virtnet_open() will refill when device is going to up. */
1556 if (dev->flags & IFF_UP)
1557 schedule_delayed_work(&vi->refill, 0);
1558 }
1559
1560 return 0;
1561 }
1562
1563 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
1564 {
1565 int err;
1566
1567 rtnl_lock();
1568 err = _virtnet_set_queues(vi, queue_pairs);
1569 rtnl_unlock();
1570 return err;
1571 }
1572
1573 static int virtnet_close(struct net_device *dev)
1574 {
1575 struct virtnet_info *vi = netdev_priv(dev);
1576 int i;
1577
1578 /* Make sure refill_work doesn't re-enable napi! */
1579 cancel_delayed_work_sync(&vi->refill);
1580
1581 for (i = 0; i < vi->max_queue_pairs; i++) {
1582 napi_disable(&vi->rq[i].napi);
1583 virtnet_napi_tx_disable(&vi->sq[i].napi);
1584 }
1585
1586 return 0;
1587 }
1588
1589 static void virtnet_set_rx_mode(struct net_device *dev)
1590 {
1591 struct virtnet_info *vi = netdev_priv(dev);
1592 struct scatterlist sg[2];
1593 struct virtio_net_ctrl_mac *mac_data;
1594 struct netdev_hw_addr *ha;
1595 int uc_count;
1596 int mc_count;
1597 void *buf;
1598 int i;
1599
1600 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
1601 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
1602 return;
1603
1604 vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
1605 vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
1606
1607 sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc));
1608
1609 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1610 VIRTIO_NET_CTRL_RX_PROMISC, sg))
1611 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
1612 vi->ctrl->promisc ? "en" : "dis");
1613
1614 sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti));
1615
1616 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1617 VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
1618 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
1619 vi->ctrl->allmulti ? "en" : "dis");
1620
1621 uc_count = netdev_uc_count(dev);
1622 mc_count = netdev_mc_count(dev);
1623 /* MAC filter - use one buffer for both lists */
1624 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
1625 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
1626 mac_data = buf;
1627 if (!buf)
1628 return;
1629
1630 sg_init_table(sg, 2);
1631
1632 /* Store the unicast list and count in the front of the buffer */
1633 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
1634 i = 0;
1635 netdev_for_each_uc_addr(ha, dev)
1636 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1637
1638 sg_set_buf(&sg[0], mac_data,
1639 sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
1640
1641 /* multicast list and count fill the end */
1642 mac_data = (void *)&mac_data->macs[uc_count][0];
1643
1644 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
1645 i = 0;
1646 netdev_for_each_mc_addr(ha, dev)
1647 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1648
1649 sg_set_buf(&sg[1], mac_data,
1650 sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
1651
1652 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1653 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
1654 dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
1655
1656 kfree(buf);
1657 }
1658
1659 static int virtnet_vlan_rx_add_vid(struct net_device *dev,
1660 __be16 proto, u16 vid)
1661 {
1662 struct virtnet_info *vi = netdev_priv(dev);
1663 struct scatterlist sg;
1664
1665 vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
1666 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
1667
1668 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1669 VIRTIO_NET_CTRL_VLAN_ADD, &sg))
1670 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
1671 return 0;
1672 }
1673
1674 static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
1675 __be16 proto, u16 vid)
1676 {
1677 struct virtnet_info *vi = netdev_priv(dev);
1678 struct scatterlist sg;
1679
1680 vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
1681 sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
1682
1683 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1684 VIRTIO_NET_CTRL_VLAN_DEL, &sg))
1685 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
1686 return 0;
1687 }
1688
1689 static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
1690 {
1691 int i;
1692
1693 if (vi->affinity_hint_set) {
1694 for (i = 0; i < vi->max_queue_pairs; i++) {
1695 virtqueue_set_affinity(vi->rq[i].vq, -1);
1696 virtqueue_set_affinity(vi->sq[i].vq, -1);
1697 }
1698
1699 vi->affinity_hint_set = false;
1700 }
1701 }
1702
1703 static void virtnet_set_affinity(struct virtnet_info *vi)
1704 {
1705 int i;
1706 int cpu;
1707
1708 /* In multiqueue mode, when the number of cpu is equal to the number of
1709 * queue pairs, we let the queue pairs to be private to one cpu by
1710 * setting the affinity hint to eliminate the contention.
1711 */
1712 if (vi->curr_queue_pairs == 1 ||
1713 vi->max_queue_pairs != num_online_cpus()) {
1714 virtnet_clean_affinity(vi, -1);
1715 return;
1716 }
1717
1718 i = 0;
1719 for_each_online_cpu(cpu) {
1720 virtqueue_set_affinity(vi->rq[i].vq, cpu);
1721 virtqueue_set_affinity(vi->sq[i].vq, cpu);
1722 netif_set_xps_queue(vi->dev, cpumask_of(cpu), i);
1723 i++;
1724 }
1725
1726 vi->affinity_hint_set = true;
1727 }
1728
1729 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
1730 {
1731 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1732 node);
1733 virtnet_set_affinity(vi);
1734 return 0;
1735 }
1736
1737 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
1738 {
1739 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1740 node_dead);
1741 virtnet_set_affinity(vi);
1742 return 0;
1743 }
1744
1745 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
1746 {
1747 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1748 node);
1749
1750 virtnet_clean_affinity(vi, cpu);
1751 return 0;
1752 }
1753
1754 static enum cpuhp_state virtionet_online;
1755
1756 static int virtnet_cpu_notif_add(struct virtnet_info *vi)
1757 {
1758 int ret;
1759
1760 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
1761 if (ret)
1762 return ret;
1763 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
1764 &vi->node_dead);
1765 if (!ret)
1766 return ret;
1767 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
1768 return ret;
1769 }
1770
1771 static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
1772 {
1773 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
1774 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
1775 &vi->node_dead);
1776 }
1777
1778 static void virtnet_get_ringparam(struct net_device *dev,
1779 struct ethtool_ringparam *ring)
1780 {
1781 struct virtnet_info *vi = netdev_priv(dev);
1782
1783 ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
1784 ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
1785 ring->rx_pending = ring->rx_max_pending;
1786 ring->tx_pending = ring->tx_max_pending;
1787 }
1788
1789
1790 static void virtnet_get_drvinfo(struct net_device *dev,
1791 struct ethtool_drvinfo *info)
1792 {
1793 struct virtnet_info *vi = netdev_priv(dev);
1794 struct virtio_device *vdev = vi->vdev;
1795
1796 strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
1797 strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
1798 strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
1799
1800 }
1801
1802 /* TODO: Eliminate OOO packets during switching */
1803 static int virtnet_set_channels(struct net_device *dev,
1804 struct ethtool_channels *channels)
1805 {
1806 struct virtnet_info *vi = netdev_priv(dev);
1807 u16 queue_pairs = channels->combined_count;
1808 int err;
1809
1810 /* We don't support separate rx/tx channels.
1811 * We don't allow setting 'other' channels.
1812 */
1813 if (channels->rx_count || channels->tx_count || channels->other_count)
1814 return -EINVAL;
1815
1816 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
1817 return -EINVAL;
1818
1819 /* For now we don't support modifying channels while XDP is loaded
1820 * also when XDP is loaded all RX queues have XDP programs so we only
1821 * need to check a single RX queue.
1822 */
1823 if (vi->rq[0].xdp_prog)
1824 return -EINVAL;
1825
1826 get_online_cpus();
1827 err = _virtnet_set_queues(vi, queue_pairs);
1828 if (!err) {
1829 netif_set_real_num_tx_queues(dev, queue_pairs);
1830 netif_set_real_num_rx_queues(dev, queue_pairs);
1831
1832 virtnet_set_affinity(vi);
1833 }
1834 put_online_cpus();
1835
1836 return err;
1837 }
1838
1839 static void virtnet_get_channels(struct net_device *dev,
1840 struct ethtool_channels *channels)
1841 {
1842 struct virtnet_info *vi = netdev_priv(dev);
1843
1844 channels->combined_count = vi->curr_queue_pairs;
1845 channels->max_combined = vi->max_queue_pairs;
1846 channels->max_other = 0;
1847 channels->rx_count = 0;
1848 channels->tx_count = 0;
1849 channels->other_count = 0;
1850 }
1851
1852 /* Check if the user is trying to change anything besides speed/duplex */
1853 static bool
1854 virtnet_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd)
1855 {
1856 struct ethtool_link_ksettings diff1 = *cmd;
1857 struct ethtool_link_ksettings diff2 = {};
1858
1859 /* cmd is always set so we need to clear it, validate the port type
1860 * and also without autonegotiation we can ignore advertising
1861 */
1862 diff1.base.speed = 0;
1863 diff2.base.port = PORT_OTHER;
1864 ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
1865 diff1.base.duplex = 0;
1866 diff1.base.cmd = 0;
1867 diff1.base.link_mode_masks_nwords = 0;
1868
1869 return !memcmp(&diff1.base, &diff2.base, sizeof(diff1.base)) &&
1870 bitmap_empty(diff1.link_modes.supported,
1871 __ETHTOOL_LINK_MODE_MASK_NBITS) &&
1872 bitmap_empty(diff1.link_modes.advertising,
1873 __ETHTOOL_LINK_MODE_MASK_NBITS) &&
1874 bitmap_empty(diff1.link_modes.lp_advertising,
1875 __ETHTOOL_LINK_MODE_MASK_NBITS);
1876 }
1877
1878 static int virtnet_set_link_ksettings(struct net_device *dev,
1879 const struct ethtool_link_ksettings *cmd)
1880 {
1881 struct virtnet_info *vi = netdev_priv(dev);
1882 u32 speed;
1883
1884 speed = cmd->base.speed;
1885 /* don't allow custom speed and duplex */
1886 if (!ethtool_validate_speed(speed) ||
1887 !ethtool_validate_duplex(cmd->base.duplex) ||
1888 !virtnet_validate_ethtool_cmd(cmd))
1889 return -EINVAL;
1890 vi->speed = speed;
1891 vi->duplex = cmd->base.duplex;
1892
1893 return 0;
1894 }
1895
1896 static int virtnet_get_link_ksettings(struct net_device *dev,
1897 struct ethtool_link_ksettings *cmd)
1898 {
1899 struct virtnet_info *vi = netdev_priv(dev);
1900
1901 cmd->base.speed = vi->speed;
1902 cmd->base.duplex = vi->duplex;
1903 cmd->base.port = PORT_OTHER;
1904
1905 return 0;
1906 }
1907
1908 static void virtnet_init_settings(struct net_device *dev)
1909 {
1910 struct virtnet_info *vi = netdev_priv(dev);
1911
1912 vi->speed = SPEED_UNKNOWN;
1913 vi->duplex = DUPLEX_UNKNOWN;
1914 }
1915
1916 static const struct ethtool_ops virtnet_ethtool_ops = {
1917 .get_drvinfo = virtnet_get_drvinfo,
1918 .get_link = ethtool_op_get_link,
1919 .get_ringparam = virtnet_get_ringparam,
1920 .set_channels = virtnet_set_channels,
1921 .get_channels = virtnet_get_channels,
1922 .get_ts_info = ethtool_op_get_ts_info,
1923 .get_link_ksettings = virtnet_get_link_ksettings,
1924 .set_link_ksettings = virtnet_set_link_ksettings,
1925 };
1926
1927 static void virtnet_freeze_down(struct virtio_device *vdev)
1928 {
1929 struct virtnet_info *vi = vdev->priv;
1930 int i;
1931
1932 /* Make sure no work handler is accessing the device */
1933 flush_work(&vi->config_work);
1934
1935 netif_tx_lock_bh(vi->dev);
1936 netif_device_detach(vi->dev);
1937 netif_tx_unlock_bh(vi->dev);
1938 cancel_delayed_work_sync(&vi->refill);
1939
1940 if (netif_running(vi->dev)) {
1941 for (i = 0; i < vi->max_queue_pairs; i++) {
1942 napi_disable(&vi->rq[i].napi);
1943 virtnet_napi_tx_disable(&vi->sq[i].napi);
1944 }
1945 }
1946 }
1947
1948 static int init_vqs(struct virtnet_info *vi);
1949
1950 static int virtnet_restore_up(struct virtio_device *vdev)
1951 {
1952 struct virtnet_info *vi = vdev->priv;
1953 int err, i;
1954
1955 err = init_vqs(vi);
1956 if (err)
1957 return err;
1958
1959 virtio_device_ready(vdev);
1960
1961 if (netif_running(vi->dev)) {
1962 for (i = 0; i < vi->curr_queue_pairs; i++)
1963 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
1964 schedule_delayed_work(&vi->refill, 0);
1965
1966 for (i = 0; i < vi->max_queue_pairs; i++) {
1967 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
1968 virtnet_napi_tx_enable(vi, vi->sq[i].vq,
1969 &vi->sq[i].napi);
1970 }
1971 }
1972
1973 netif_tx_lock_bh(vi->dev);
1974 netif_device_attach(vi->dev);
1975 netif_tx_unlock_bh(vi->dev);
1976 return err;
1977 }
1978
1979 static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
1980 {
1981 struct scatterlist sg;
1982 vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads);
1983
1984 sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads));
1985
1986 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
1987 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
1988 dev_warn(&vi->dev->dev, "Fail to set guest offload. \n");
1989 return -EINVAL;
1990 }
1991
1992 return 0;
1993 }
1994
1995 static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
1996 {
1997 u64 offloads = 0;
1998
1999 if (!vi->guest_offloads)
2000 return 0;
2001
2002 return virtnet_set_guest_offloads(vi, offloads);
2003 }
2004
2005 static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
2006 {
2007 u64 offloads = vi->guest_offloads;
2008
2009 if (!vi->guest_offloads)
2010 return 0;
2011
2012 return virtnet_set_guest_offloads(vi, offloads);
2013 }
2014
2015 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
2016 struct netlink_ext_ack *extack)
2017 {
2018 unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
2019 struct virtnet_info *vi = netdev_priv(dev);
2020 struct bpf_prog *old_prog;
2021 u16 xdp_qp = 0, curr_qp;
2022 int i, err;
2023
2024 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
2025 && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
2026 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
2027 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
2028 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
2029 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
2030 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
2031 return -EOPNOTSUPP;
2032 }
2033
2034 if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
2035 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
2036 return -EINVAL;
2037 }
2038
2039 if (dev->mtu > max_sz) {
2040 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
2041 netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
2042 return -EINVAL;
2043 }
2044
2045 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
2046 if (prog)
2047 xdp_qp = nr_cpu_ids;
2048
2049 /* XDP requires extra queues for XDP_TX */
2050 if (curr_qp + xdp_qp > vi->max_queue_pairs) {
2051 NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
2052 netdev_warn(dev, "request %i queues but max is %i\n",
2053 curr_qp + xdp_qp, vi->max_queue_pairs);
2054 return -ENOMEM;
2055 }
2056
2057 if (prog) {
2058 prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
2059 if (IS_ERR(prog))
2060 return PTR_ERR(prog);
2061 }
2062
2063 /* Make sure NAPI is not using any XDP TX queues for RX. */
2064 if (netif_running(dev))
2065 for (i = 0; i < vi->max_queue_pairs; i++)
2066 napi_disable(&vi->rq[i].napi);
2067
2068 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
2069 err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
2070 if (err)
2071 goto err;
2072 vi->xdp_queue_pairs = xdp_qp;
2073
2074 for (i = 0; i < vi->max_queue_pairs; i++) {
2075 old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
2076 rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
2077 if (i == 0) {
2078 if (!old_prog)
2079 virtnet_clear_guest_offloads(vi);
2080 if (!prog)
2081 virtnet_restore_guest_offloads(vi);
2082 }
2083 if (old_prog)
2084 bpf_prog_put(old_prog);
2085 if (netif_running(dev))
2086 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
2087 }
2088
2089 return 0;
2090
2091 err:
2092 for (i = 0; i < vi->max_queue_pairs; i++)
2093 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
2094 if (prog)
2095 bpf_prog_sub(prog, vi->max_queue_pairs - 1);
2096 return err;
2097 }
2098
2099 static u32 virtnet_xdp_query(struct net_device *dev)
2100 {
2101 struct virtnet_info *vi = netdev_priv(dev);
2102 const struct bpf_prog *xdp_prog;
2103 int i;
2104
2105 for (i = 0; i < vi->max_queue_pairs; i++) {
2106 xdp_prog = rtnl_dereference(vi->rq[i].xdp_prog);
2107 if (xdp_prog)
2108 return xdp_prog->aux->id;
2109 }
2110 return 0;
2111 }
2112
2113 static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
2114 {
2115 switch (xdp->command) {
2116 case XDP_SETUP_PROG:
2117 return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
2118 case XDP_QUERY_PROG:
2119 xdp->prog_id = virtnet_xdp_query(dev);
2120 xdp->prog_attached = !!xdp->prog_id;
2121 return 0;
2122 default:
2123 return -EINVAL;
2124 }
2125 }
2126
2127 static const struct net_device_ops virtnet_netdev = {
2128 .ndo_open = virtnet_open,
2129 .ndo_stop = virtnet_close,
2130 .ndo_start_xmit = start_xmit,
2131 .ndo_validate_addr = eth_validate_addr,
2132 .ndo_set_mac_address = virtnet_set_mac_address,
2133 .ndo_set_rx_mode = virtnet_set_rx_mode,
2134 .ndo_get_stats64 = virtnet_stats,
2135 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
2136 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
2137 #ifdef CONFIG_NET_POLL_CONTROLLER
2138 .ndo_poll_controller = virtnet_netpoll,
2139 #endif
2140 .ndo_bpf = virtnet_xdp,
2141 .ndo_xdp_xmit = virtnet_xdp_xmit,
2142 .ndo_xdp_flush = virtnet_xdp_flush,
2143 .ndo_features_check = passthru_features_check,
2144 };
2145
2146 static void virtnet_config_changed_work(struct work_struct *work)
2147 {
2148 struct virtnet_info *vi =
2149 container_of(work, struct virtnet_info, config_work);
2150 u16 v;
2151
2152 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
2153 struct virtio_net_config, status, &v) < 0)
2154 return;
2155
2156 if (v & VIRTIO_NET_S_ANNOUNCE) {
2157 netdev_notify_peers(vi->dev);
2158 virtnet_ack_link_announce(vi);
2159 }
2160
2161 /* Ignore unknown (future) status bits */
2162 v &= VIRTIO_NET_S_LINK_UP;
2163
2164 if (vi->status == v)
2165 return;
2166
2167 vi->status = v;
2168
2169 if (vi->status & VIRTIO_NET_S_LINK_UP) {
2170 netif_carrier_on(vi->dev);
2171 netif_tx_wake_all_queues(vi->dev);
2172 } else {
2173 netif_carrier_off(vi->dev);
2174 netif_tx_stop_all_queues(vi->dev);
2175 }
2176 }
2177
2178 static void virtnet_config_changed(struct virtio_device *vdev)
2179 {
2180 struct virtnet_info *vi = vdev->priv;
2181
2182 schedule_work(&vi->config_work);
2183 }
2184
2185 static void virtnet_free_queues(struct virtnet_info *vi)
2186 {
2187 int i;
2188
2189 for (i = 0; i < vi->max_queue_pairs; i++) {
2190 napi_hash_del(&vi->rq[i].napi);
2191 netif_napi_del(&vi->rq[i].napi);
2192 netif_napi_del(&vi->sq[i].napi);
2193 }
2194
2195 /* We called napi_hash_del() before netif_napi_del(),
2196 * we need to respect an RCU grace period before freeing vi->rq
2197 */
2198 synchronize_net();
2199
2200 kfree(vi->rq);
2201 kfree(vi->sq);
2202 kfree(vi->ctrl);
2203 }
2204
2205 static void _free_receive_bufs(struct virtnet_info *vi)
2206 {
2207 struct bpf_prog *old_prog;
2208 int i;
2209
2210 for (i = 0; i < vi->max_queue_pairs; i++) {
2211 while (vi->rq[i].pages)
2212 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
2213
2214 old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
2215 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
2216 if (old_prog)
2217 bpf_prog_put(old_prog);
2218 }
2219 }
2220
2221 static void free_receive_bufs(struct virtnet_info *vi)
2222 {
2223 rtnl_lock();
2224 _free_receive_bufs(vi);
2225 rtnl_unlock();
2226 }
2227
2228 static void free_receive_page_frags(struct virtnet_info *vi)
2229 {
2230 int i;
2231 for (i = 0; i < vi->max_queue_pairs; i++)
2232 if (vi->rq[i].alloc_frag.page)
2233 put_page(vi->rq[i].alloc_frag.page);
2234 }
2235
2236 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
2237 {
2238 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
2239 return false;
2240 else if (q < vi->curr_queue_pairs)
2241 return true;
2242 else
2243 return false;
2244 }
2245
2246 static void free_unused_bufs(struct virtnet_info *vi)
2247 {
2248 void *buf;
2249 int i;
2250
2251 for (i = 0; i < vi->max_queue_pairs; i++) {
2252 struct virtqueue *vq = vi->sq[i].vq;
2253 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
2254 if (!is_xdp_raw_buffer_queue(vi, i))
2255 dev_kfree_skb(buf);
2256 else
2257 put_page(virt_to_head_page(buf));
2258 }
2259 }
2260
2261 for (i = 0; i < vi->max_queue_pairs; i++) {
2262 struct virtqueue *vq = vi->rq[i].vq;
2263
2264 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
2265 if (vi->mergeable_rx_bufs) {
2266 put_page(virt_to_head_page(buf));
2267 } else if (vi->big_packets) {
2268 give_pages(&vi->rq[i], buf);
2269 } else {
2270 put_page(virt_to_head_page(buf));
2271 }
2272 }
2273 }
2274 }
2275
2276 static void virtnet_del_vqs(struct virtnet_info *vi)
2277 {
2278 struct virtio_device *vdev = vi->vdev;
2279
2280 virtnet_clean_affinity(vi, -1);
2281
2282 vdev->config->del_vqs(vdev);
2283
2284 virtnet_free_queues(vi);
2285 }
2286
2287 /* How large should a single buffer be so a queue full of these can fit at
2288 * least one full packet?
2289 * Logic below assumes the mergeable buffer header is used.
2290 */
2291 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
2292 {
2293 const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2294 unsigned int rq_size = virtqueue_get_vring_size(vq);
2295 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
2296 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
2297 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
2298
2299 return max(max(min_buf_len, hdr_len) - hdr_len,
2300 (unsigned int)GOOD_PACKET_LEN);
2301 }
2302
2303 static int virtnet_find_vqs(struct virtnet_info *vi)
2304 {
2305 vq_callback_t **callbacks;
2306 struct virtqueue **vqs;
2307 int ret = -ENOMEM;
2308 int i, total_vqs;
2309 const char **names;
2310 bool *ctx;
2311
2312 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
2313 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
2314 * possible control vq.
2315 */
2316 total_vqs = vi->max_queue_pairs * 2 +
2317 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
2318
2319 /* Allocate space for find_vqs parameters */
2320 vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
2321 if (!vqs)
2322 goto err_vq;
2323 callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
2324 if (!callbacks)
2325 goto err_callback;
2326 names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
2327 if (!names)
2328 goto err_names;
2329 if (!vi->big_packets || vi->mergeable_rx_bufs) {
2330 ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
2331 if (!ctx)
2332 goto err_ctx;
2333 } else {
2334 ctx = NULL;
2335 }
2336
2337 /* Parameters for control virtqueue, if any */
2338 if (vi->has_cvq) {
2339 callbacks[total_vqs - 1] = NULL;
2340 names[total_vqs - 1] = "control";
2341 }
2342
2343 /* Allocate/initialize parameters for send/receive virtqueues */
2344 for (i = 0; i < vi->max_queue_pairs; i++) {
2345 callbacks[rxq2vq(i)] = skb_recv_done;
2346 callbacks[txq2vq(i)] = skb_xmit_done;
2347 sprintf(vi->rq[i].name, "input.%d", i);
2348 sprintf(vi->sq[i].name, "output.%d", i);
2349 names[rxq2vq(i)] = vi->rq[i].name;
2350 names[txq2vq(i)] = vi->sq[i].name;
2351 if (ctx)
2352 ctx[rxq2vq(i)] = true;
2353 }
2354
2355 ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
2356 names, ctx, NULL);
2357 if (ret)
2358 goto err_find;
2359
2360 if (vi->has_cvq) {
2361 vi->cvq = vqs[total_vqs - 1];
2362 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
2363 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
2364 }
2365
2366 for (i = 0; i < vi->max_queue_pairs; i++) {
2367 vi->rq[i].vq = vqs[rxq2vq(i)];
2368 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
2369 vi->sq[i].vq = vqs[txq2vq(i)];
2370 }
2371
2372 kfree(names);
2373 kfree(callbacks);
2374 kfree(vqs);
2375 kfree(ctx);
2376
2377 return 0;
2378
2379 err_find:
2380 kfree(ctx);
2381 err_ctx:
2382 kfree(names);
2383 err_names:
2384 kfree(callbacks);
2385 err_callback:
2386 kfree(vqs);
2387 err_vq:
2388 return ret;
2389 }
2390
2391 static int virtnet_alloc_queues(struct virtnet_info *vi)
2392 {
2393 int i;
2394
2395 vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
2396 if (!vi->ctrl)
2397 goto err_ctrl;
2398 vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
2399 if (!vi->sq)
2400 goto err_sq;
2401 vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
2402 if (!vi->rq)
2403 goto err_rq;
2404
2405 INIT_DELAYED_WORK(&vi->refill, refill_work);
2406 for (i = 0; i < vi->max_queue_pairs; i++) {
2407 vi->rq[i].pages = NULL;
2408 netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
2409 napi_weight);
2410 netif_tx_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx,
2411 napi_tx ? napi_weight : 0);
2412
2413 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
2414 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
2415 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
2416 }
2417
2418 return 0;
2419
2420 err_rq:
2421 kfree(vi->sq);
2422 err_sq:
2423 kfree(vi->ctrl);
2424 err_ctrl:
2425 return -ENOMEM;
2426 }
2427
2428 static int init_vqs(struct virtnet_info *vi)
2429 {
2430 int ret;
2431
2432 /* Allocate send & receive queues */
2433 ret = virtnet_alloc_queues(vi);
2434 if (ret)
2435 goto err;
2436
2437 ret = virtnet_find_vqs(vi);
2438 if (ret)
2439 goto err_free;
2440
2441 get_online_cpus();
2442 virtnet_set_affinity(vi);
2443 put_online_cpus();
2444
2445 return 0;
2446
2447 err_free:
2448 virtnet_free_queues(vi);
2449 err:
2450 return ret;
2451 }
2452
2453 #ifdef CONFIG_SYSFS
2454 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
2455 char *buf)
2456 {
2457 struct virtnet_info *vi = netdev_priv(queue->dev);
2458 unsigned int queue_index = get_netdev_rx_queue_index(queue);
2459 struct ewma_pkt_len *avg;
2460
2461 BUG_ON(queue_index >= vi->max_queue_pairs);
2462 avg = &vi->rq[queue_index].mrg_avg_pkt_len;
2463 return sprintf(buf, "%u\n",
2464 get_mergeable_buf_len(&vi->rq[queue_index], avg));
2465 }
2466
2467 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
2468 __ATTR_RO(mergeable_rx_buffer_size);
2469
2470 static struct attribute *virtio_net_mrg_rx_attrs[] = {
2471 &mergeable_rx_buffer_size_attribute.attr,
2472 NULL
2473 };
2474
2475 static const struct attribute_group virtio_net_mrg_rx_group = {
2476 .name = "virtio_net",
2477 .attrs = virtio_net_mrg_rx_attrs
2478 };
2479 #endif
2480
2481 static bool virtnet_fail_on_feature(struct virtio_device *vdev,
2482 unsigned int fbit,
2483 const char *fname, const char *dname)
2484 {
2485 if (!virtio_has_feature(vdev, fbit))
2486 return false;
2487
2488 dev_err(&vdev->dev, "device advertises feature %s but not %s",
2489 fname, dname);
2490
2491 return true;
2492 }
2493
2494 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
2495 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
2496
2497 static bool virtnet_validate_features(struct virtio_device *vdev)
2498 {
2499 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
2500 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
2501 "VIRTIO_NET_F_CTRL_VQ") ||
2502 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
2503 "VIRTIO_NET_F_CTRL_VQ") ||
2504 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
2505 "VIRTIO_NET_F_CTRL_VQ") ||
2506 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
2507 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
2508 "VIRTIO_NET_F_CTRL_VQ"))) {
2509 return false;
2510 }
2511
2512 return true;
2513 }
2514
2515 #define MIN_MTU ETH_MIN_MTU
2516 #define MAX_MTU ETH_MAX_MTU
2517
2518 static int virtnet_validate(struct virtio_device *vdev)
2519 {
2520 if (!vdev->config->get) {
2521 dev_err(&vdev->dev, "%s failure: config access disabled\n",
2522 __func__);
2523 return -EINVAL;
2524 }
2525
2526 if (!virtnet_validate_features(vdev))
2527 return -EINVAL;
2528
2529 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
2530 int mtu = virtio_cread16(vdev,
2531 offsetof(struct virtio_net_config,
2532 mtu));
2533 if (mtu < MIN_MTU)
2534 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
2535 }
2536
2537 return 0;
2538 }
2539
2540 static int virtnet_probe(struct virtio_device *vdev)
2541 {
2542 int i, err;
2543 struct net_device *dev;
2544 struct virtnet_info *vi;
2545 u16 max_queue_pairs;
2546 int mtu;
2547
2548 /* Find if host supports multiqueue virtio_net device */
2549 err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
2550 struct virtio_net_config,
2551 max_virtqueue_pairs, &max_queue_pairs);
2552
2553 /* We need at least 2 queue's */
2554 if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2555 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
2556 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
2557 max_queue_pairs = 1;
2558
2559 /* Allocate ourselves a network device with room for our info */
2560 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
2561 if (!dev)
2562 return -ENOMEM;
2563
2564 /* Set up network device as normal. */
2565 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
2566 dev->netdev_ops = &virtnet_netdev;
2567 dev->features = NETIF_F_HIGHDMA;
2568
2569 dev->ethtool_ops = &virtnet_ethtool_ops;
2570 SET_NETDEV_DEV(dev, &vdev->dev);
2571
2572 /* Do we support "hardware" checksums? */
2573 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
2574 /* This opens up the world of extra features. */
2575 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
2576 if (csum)
2577 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
2578
2579 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
2580 dev->hw_features |= NETIF_F_TSO
2581 | NETIF_F_TSO_ECN | NETIF_F_TSO6;
2582 }
2583 /* Individual feature bits: what can host handle? */
2584 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
2585 dev->hw_features |= NETIF_F_TSO;
2586 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
2587 dev->hw_features |= NETIF_F_TSO6;
2588 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
2589 dev->hw_features |= NETIF_F_TSO_ECN;
2590
2591 dev->features |= NETIF_F_GSO_ROBUST;
2592
2593 if (gso)
2594 dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
2595 /* (!csum && gso) case will be fixed by register_netdev() */
2596 }
2597 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
2598 dev->features |= NETIF_F_RXCSUM;
2599
2600 dev->vlan_features = dev->features;
2601
2602 /* MTU range: 68 - 65535 */
2603 dev->min_mtu = MIN_MTU;
2604 dev->max_mtu = MAX_MTU;
2605
2606 /* Configuration may specify what MAC to use. Otherwise random. */
2607 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
2608 virtio_cread_bytes(vdev,
2609 offsetof(struct virtio_net_config, mac),
2610 dev->dev_addr, dev->addr_len);
2611 else
2612 eth_hw_addr_random(dev);
2613
2614 /* Set up our device-specific information */
2615 vi = netdev_priv(dev);
2616 vi->dev = dev;
2617 vi->vdev = vdev;
2618 vdev->priv = vi;
2619 vi->stats = alloc_percpu(struct virtnet_stats);
2620 err = -ENOMEM;
2621 if (vi->stats == NULL)
2622 goto free;
2623
2624 for_each_possible_cpu(i) {
2625 struct virtnet_stats *virtnet_stats;
2626 virtnet_stats = per_cpu_ptr(vi->stats, i);
2627 u64_stats_init(&virtnet_stats->tx_syncp);
2628 u64_stats_init(&virtnet_stats->rx_syncp);
2629 }
2630
2631 INIT_WORK(&vi->config_work, virtnet_config_changed_work);
2632
2633 /* If we can receive ANY GSO packets, we must allocate large ones. */
2634 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
2635 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
2636 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
2637 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
2638 vi->big_packets = true;
2639
2640 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
2641 vi->mergeable_rx_bufs = true;
2642
2643 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
2644 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
2645 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2646 else
2647 vi->hdr_len = sizeof(struct virtio_net_hdr);
2648
2649 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
2650 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
2651 vi->any_header_sg = true;
2652
2653 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
2654 vi->has_cvq = true;
2655
2656 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
2657 mtu = virtio_cread16(vdev,
2658 offsetof(struct virtio_net_config,
2659 mtu));
2660 if (mtu < dev->min_mtu) {
2661 /* Should never trigger: MTU was previously validated
2662 * in virtnet_validate.
2663 */
2664 dev_err(&vdev->dev, "device MTU appears to have changed "
2665 "it is now %d < %d", mtu, dev->min_mtu);
2666 goto free_stats;
2667 }
2668
2669 dev->mtu = mtu;
2670 dev->max_mtu = mtu;
2671
2672 /* TODO: size buffers correctly in this case. */
2673 if (dev->mtu > ETH_DATA_LEN)
2674 vi->big_packets = true;
2675 }
2676
2677 if (vi->any_header_sg)
2678 dev->needed_headroom = vi->hdr_len;
2679
2680 /* Enable multiqueue by default */
2681 if (num_online_cpus() >= max_queue_pairs)
2682 vi->curr_queue_pairs = max_queue_pairs;
2683 else
2684 vi->curr_queue_pairs = num_online_cpus();
2685 vi->max_queue_pairs = max_queue_pairs;
2686
2687 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
2688 err = init_vqs(vi);
2689 if (err)
2690 goto free_stats;
2691
2692 #ifdef CONFIG_SYSFS
2693 if (vi->mergeable_rx_bufs)
2694 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
2695 #endif
2696 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
2697 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
2698
2699 virtnet_init_settings(dev);
2700
2701 err = register_netdev(dev);
2702 if (err) {
2703 pr_debug("virtio_net: registering device failed\n");
2704 goto free_vqs;
2705 }
2706
2707 virtio_device_ready(vdev);
2708
2709 err = virtnet_cpu_notif_add(vi);
2710 if (err) {
2711 pr_debug("virtio_net: registering cpu notifier failed\n");
2712 goto free_unregister_netdev;
2713 }
2714
2715 virtnet_set_queues(vi, vi->curr_queue_pairs);
2716
2717 /* Assume link up if device can't report link status,
2718 otherwise get link status from config. */
2719 netif_carrier_off(dev);
2720 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
2721 schedule_work(&vi->config_work);
2722 } else {
2723 vi->status = VIRTIO_NET_S_LINK_UP;
2724 netif_carrier_on(dev);
2725 }
2726
2727 for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
2728 if (virtio_has_feature(vi->vdev, guest_offloads[i]))
2729 set_bit(guest_offloads[i], &vi->guest_offloads);
2730
2731 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
2732 dev->name, max_queue_pairs);
2733
2734 return 0;
2735
2736 free_unregister_netdev:
2737 vi->vdev->config->reset(vdev);
2738
2739 unregister_netdev(dev);
2740 free_vqs:
2741 cancel_delayed_work_sync(&vi->refill);
2742 free_receive_page_frags(vi);
2743 virtnet_del_vqs(vi);
2744 free_stats:
2745 free_percpu(vi->stats);
2746 free:
2747 free_netdev(dev);
2748 return err;
2749 }
2750
2751 static void remove_vq_common(struct virtnet_info *vi)
2752 {
2753 vi->vdev->config->reset(vi->vdev);
2754
2755 /* Free unused buffers in both send and recv, if any. */
2756 free_unused_bufs(vi);
2757
2758 free_receive_bufs(vi);
2759
2760 free_receive_page_frags(vi);
2761
2762 virtnet_del_vqs(vi);
2763 }
2764
2765 static void virtnet_remove(struct virtio_device *vdev)
2766 {
2767 struct virtnet_info *vi = vdev->priv;
2768
2769 virtnet_cpu_notif_remove(vi);
2770
2771 /* Make sure no work handler is accessing the device. */
2772 flush_work(&vi->config_work);
2773
2774 unregister_netdev(vi->dev);
2775
2776 remove_vq_common(vi);
2777
2778 free_percpu(vi->stats);
2779 free_netdev(vi->dev);
2780 }
2781
2782 static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
2783 {
2784 struct virtnet_info *vi = vdev->priv;
2785
2786 virtnet_cpu_notif_remove(vi);
2787 virtnet_freeze_down(vdev);
2788 remove_vq_common(vi);
2789
2790 return 0;
2791 }
2792
2793 static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
2794 {
2795 struct virtnet_info *vi = vdev->priv;
2796 int err;
2797
2798 err = virtnet_restore_up(vdev);
2799 if (err)
2800 return err;
2801 virtnet_set_queues(vi, vi->curr_queue_pairs);
2802
2803 err = virtnet_cpu_notif_add(vi);
2804 if (err)
2805 return err;
2806
2807 return 0;
2808 }
2809
2810 static struct virtio_device_id id_table[] = {
2811 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2812 { 0 },
2813 };
2814
2815 #define VIRTNET_FEATURES \
2816 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
2817 VIRTIO_NET_F_MAC, \
2818 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
2819 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
2820 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
2821 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
2822 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
2823 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
2824 VIRTIO_NET_F_CTRL_MAC_ADDR, \
2825 VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
2826
2827 static unsigned int features[] = {
2828 VIRTNET_FEATURES,
2829 };
2830
2831 static unsigned int features_legacy[] = {
2832 VIRTNET_FEATURES,
2833 VIRTIO_NET_F_GSO,
2834 VIRTIO_F_ANY_LAYOUT,
2835 };
2836
2837 static struct virtio_driver virtio_net_driver = {
2838 .feature_table = features,
2839 .feature_table_size = ARRAY_SIZE(features),
2840 .feature_table_legacy = features_legacy,
2841 .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
2842 .driver.name = KBUILD_MODNAME,
2843 .driver.owner = THIS_MODULE,
2844 .id_table = id_table,
2845 .validate = virtnet_validate,
2846 .probe = virtnet_probe,
2847 .remove = virtnet_remove,
2848 .config_changed = virtnet_config_changed,
2849 #ifdef CONFIG_PM_SLEEP
2850 .freeze = virtnet_freeze,
2851 .restore = virtnet_restore,
2852 #endif
2853 };
2854
2855 static __init int virtio_net_driver_init(void)
2856 {
2857 int ret;
2858
2859 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online",
2860 virtnet_cpu_online,
2861 virtnet_cpu_down_prep);
2862 if (ret < 0)
2863 goto out;
2864 virtionet_online = ret;
2865 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead",
2866 NULL, virtnet_cpu_dead);
2867 if (ret)
2868 goto err_dead;
2869
2870 ret = register_virtio_driver(&virtio_net_driver);
2871 if (ret)
2872 goto err_virtio;
2873 return 0;
2874 err_virtio:
2875 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
2876 err_dead:
2877 cpuhp_remove_multi_state(virtionet_online);
2878 out:
2879 return ret;
2880 }
2881 module_init(virtio_net_driver_init);
2882
2883 static __exit void virtio_net_driver_exit(void)
2884 {
2885 unregister_virtio_driver(&virtio_net_driver);
2886 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
2887 cpuhp_remove_multi_state(virtionet_online);
2888 }
2889 module_exit(virtio_net_driver_exit);
2890
2891 MODULE_DEVICE_TABLE(virtio, id_table);
2892 MODULE_DESCRIPTION("Virtio network driver");
2893 MODULE_LICENSE("GPL");