]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/net/virtio_net.c
virtio-net: pack headroom into ctx for mergeable buffers
[mirror_ubuntu-bionic-kernel.git] / drivers / net / virtio_net.c
1 /* A network driver using virtio.
2 *
3 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
17 */
18 //#define DEBUG
19 #include <linux/netdevice.h>
20 #include <linux/etherdevice.h>
21 #include <linux/ethtool.h>
22 #include <linux/module.h>
23 #include <linux/virtio.h>
24 #include <linux/virtio_net.h>
25 #include <linux/bpf.h>
26 #include <linux/bpf_trace.h>
27 #include <linux/scatterlist.h>
28 #include <linux/if_vlan.h>
29 #include <linux/slab.h>
30 #include <linux/cpu.h>
31 #include <linux/average.h>
32 #include <net/route.h>
33
34 static int napi_weight = NAPI_POLL_WEIGHT;
35 module_param(napi_weight, int, 0444);
36
37 static bool csum = true, gso = true, napi_tx;
38 module_param(csum, bool, 0444);
39 module_param(gso, bool, 0444);
40 module_param(napi_tx, bool, 0644);
41
42 /* FIXME: MTU in config. */
43 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
44 #define GOOD_COPY_LEN 128
45
46 #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
47
48 /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
49 #define VIRTIO_XDP_HEADROOM 256
50
51 /* RX packet size EWMA. The average packet size is used to determine the packet
52 * buffer size when refilling RX rings. As the entire RX ring may be refilled
53 * at once, the weight is chosen so that the EWMA will be insensitive to short-
54 * term, transient changes in packet size.
55 */
56 DECLARE_EWMA(pkt_len, 0, 64)
57
58 #define VIRTNET_DRIVER_VERSION "1.0.0"
59
60 struct virtnet_stats {
61 struct u64_stats_sync tx_syncp;
62 struct u64_stats_sync rx_syncp;
63 u64 tx_bytes;
64 u64 tx_packets;
65
66 u64 rx_bytes;
67 u64 rx_packets;
68 };
69
70 /* Internal representation of a send virtqueue */
71 struct send_queue {
72 /* Virtqueue associated with this send _queue */
73 struct virtqueue *vq;
74
75 /* TX: fragments + linear part + virtio header */
76 struct scatterlist sg[MAX_SKB_FRAGS + 2];
77
78 /* Name of the send queue: output.$index */
79 char name[40];
80
81 struct napi_struct napi;
82 };
83
84 /* Internal representation of a receive virtqueue */
85 struct receive_queue {
86 /* Virtqueue associated with this receive_queue */
87 struct virtqueue *vq;
88
89 struct napi_struct napi;
90
91 struct bpf_prog __rcu *xdp_prog;
92
93 /* Chain pages by the private ptr. */
94 struct page *pages;
95
96 /* Average packet length for mergeable receive buffers. */
97 struct ewma_pkt_len mrg_avg_pkt_len;
98
99 /* Page frag for packet buffer allocation. */
100 struct page_frag alloc_frag;
101
102 /* RX: fragments + linear part + virtio header */
103 struct scatterlist sg[MAX_SKB_FRAGS + 2];
104
105 /* Min single buffer size for mergeable buffers case. */
106 unsigned int min_buf_len;
107
108 /* Name of this receive queue: input.$index */
109 char name[40];
110 };
111
112 struct virtnet_info {
113 struct virtio_device *vdev;
114 struct virtqueue *cvq;
115 struct net_device *dev;
116 struct send_queue *sq;
117 struct receive_queue *rq;
118 unsigned int status;
119
120 /* Max # of queue pairs supported by the device */
121 u16 max_queue_pairs;
122
123 /* # of queue pairs currently used by the driver */
124 u16 curr_queue_pairs;
125
126 /* # of XDP queue pairs currently used by the driver */
127 u16 xdp_queue_pairs;
128
129 /* I like... big packets and I cannot lie! */
130 bool big_packets;
131
132 /* Host will merge rx buffers for big packets (shake it! shake it!) */
133 bool mergeable_rx_bufs;
134
135 /* Has control virtqueue */
136 bool has_cvq;
137
138 /* Host can handle any s/g split between our header and packet data */
139 bool any_header_sg;
140
141 /* Packet virtio header size */
142 u8 hdr_len;
143
144 /* Active statistics */
145 struct virtnet_stats __percpu *stats;
146
147 /* Work struct for refilling if we run low on memory. */
148 struct delayed_work refill;
149
150 /* Work struct for config space updates */
151 struct work_struct config_work;
152
153 /* Does the affinity hint is set for virtqueues? */
154 bool affinity_hint_set;
155
156 /* CPU hotplug instances for online & dead */
157 struct hlist_node node;
158 struct hlist_node node_dead;
159
160 /* Control VQ buffers: protected by the rtnl lock */
161 struct virtio_net_ctrl_hdr ctrl_hdr;
162 virtio_net_ctrl_ack ctrl_status;
163 struct virtio_net_ctrl_mq ctrl_mq;
164 u8 ctrl_promisc;
165 u8 ctrl_allmulti;
166 u16 ctrl_vid;
167
168 /* Ethtool settings */
169 u8 duplex;
170 u32 speed;
171 };
172
173 struct padded_vnet_hdr {
174 struct virtio_net_hdr_mrg_rxbuf hdr;
175 /*
176 * hdr is in a separate sg buffer, and data sg buffer shares same page
177 * with this header sg. This padding makes next sg 16 byte aligned
178 * after the header.
179 */
180 char padding[4];
181 };
182
183 /* Converting between virtqueue no. and kernel tx/rx queue no.
184 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
185 */
186 static int vq2txq(struct virtqueue *vq)
187 {
188 return (vq->index - 1) / 2;
189 }
190
191 static int txq2vq(int txq)
192 {
193 return txq * 2 + 1;
194 }
195
196 static int vq2rxq(struct virtqueue *vq)
197 {
198 return vq->index / 2;
199 }
200
201 static int rxq2vq(int rxq)
202 {
203 return rxq * 2;
204 }
205
206 static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
207 {
208 return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;
209 }
210
211 /*
212 * private is used to chain pages for big packets, put the whole
213 * most recent used list in the beginning for reuse
214 */
215 static void give_pages(struct receive_queue *rq, struct page *page)
216 {
217 struct page *end;
218
219 /* Find end of list, sew whole thing into vi->rq.pages. */
220 for (end = page; end->private; end = (struct page *)end->private);
221 end->private = (unsigned long)rq->pages;
222 rq->pages = page;
223 }
224
225 static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
226 {
227 struct page *p = rq->pages;
228
229 if (p) {
230 rq->pages = (struct page *)p->private;
231 /* clear private here, it is used to chain pages */
232 p->private = 0;
233 } else
234 p = alloc_page(gfp_mask);
235 return p;
236 }
237
238 static void virtqueue_napi_schedule(struct napi_struct *napi,
239 struct virtqueue *vq)
240 {
241 if (napi_schedule_prep(napi)) {
242 virtqueue_disable_cb(vq);
243 __napi_schedule(napi);
244 }
245 }
246
247 static void virtqueue_napi_complete(struct napi_struct *napi,
248 struct virtqueue *vq, int processed)
249 {
250 int opaque;
251
252 opaque = virtqueue_enable_cb_prepare(vq);
253 if (napi_complete_done(napi, processed) &&
254 unlikely(virtqueue_poll(vq, opaque)))
255 virtqueue_napi_schedule(napi, vq);
256 }
257
258 static void skb_xmit_done(struct virtqueue *vq)
259 {
260 struct virtnet_info *vi = vq->vdev->priv;
261 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
262
263 /* Suppress further interrupts. */
264 virtqueue_disable_cb(vq);
265
266 if (napi->weight)
267 virtqueue_napi_schedule(napi, vq);
268 else
269 /* We were probably waiting for more output buffers. */
270 netif_wake_subqueue(vi->dev, vq2txq(vq));
271 }
272
273 #define MRG_CTX_HEADER_SHIFT 22
274 static void *mergeable_len_to_ctx(unsigned int truesize,
275 unsigned int headroom)
276 {
277 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
278 }
279
280 static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
281 {
282 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
283 }
284
285 static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
286 {
287 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
288 }
289
290 /* Called from bottom half context */
291 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
292 struct receive_queue *rq,
293 struct page *page, unsigned int offset,
294 unsigned int len, unsigned int truesize)
295 {
296 struct sk_buff *skb;
297 struct virtio_net_hdr_mrg_rxbuf *hdr;
298 unsigned int copy, hdr_len, hdr_padded_len;
299 char *p;
300
301 p = page_address(page) + offset;
302
303 /* copy small packet so we can reuse these pages for small data */
304 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
305 if (unlikely(!skb))
306 return NULL;
307
308 hdr = skb_vnet_hdr(skb);
309
310 hdr_len = vi->hdr_len;
311 if (vi->mergeable_rx_bufs)
312 hdr_padded_len = sizeof *hdr;
313 else
314 hdr_padded_len = sizeof(struct padded_vnet_hdr);
315
316 memcpy(hdr, p, hdr_len);
317
318 len -= hdr_len;
319 offset += hdr_padded_len;
320 p += hdr_padded_len;
321
322 copy = len;
323 if (copy > skb_tailroom(skb))
324 copy = skb_tailroom(skb);
325 skb_put_data(skb, p, copy);
326
327 len -= copy;
328 offset += copy;
329
330 if (vi->mergeable_rx_bufs) {
331 if (len)
332 skb_add_rx_frag(skb, 0, page, offset, len, truesize);
333 else
334 put_page(page);
335 return skb;
336 }
337
338 /*
339 * Verify that we can indeed put this data into a skb.
340 * This is here to handle cases when the device erroneously
341 * tries to receive more than is possible. This is usually
342 * the case of a broken device.
343 */
344 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
345 net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
346 dev_kfree_skb(skb);
347 return NULL;
348 }
349 BUG_ON(offset >= PAGE_SIZE);
350 while (len) {
351 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
352 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
353 frag_size, truesize);
354 len -= frag_size;
355 page = (struct page *)page->private;
356 offset = 0;
357 }
358
359 if (page)
360 give_pages(rq, page);
361
362 return skb;
363 }
364
365 static bool virtnet_xdp_xmit(struct virtnet_info *vi,
366 struct receive_queue *rq,
367 struct xdp_buff *xdp)
368 {
369 struct virtio_net_hdr_mrg_rxbuf *hdr;
370 unsigned int len;
371 struct send_queue *sq;
372 unsigned int qp;
373 void *xdp_sent;
374 int err;
375
376 qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
377 sq = &vi->sq[qp];
378
379 /* Free up any pending old buffers before queueing new ones. */
380 while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
381 struct page *sent_page = virt_to_head_page(xdp_sent);
382
383 put_page(sent_page);
384 }
385
386 xdp->data -= vi->hdr_len;
387 /* Zero header and leave csum up to XDP layers */
388 hdr = xdp->data;
389 memset(hdr, 0, vi->hdr_len);
390
391 sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
392
393 err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC);
394 if (unlikely(err)) {
395 struct page *page = virt_to_head_page(xdp->data);
396
397 put_page(page);
398 return false;
399 }
400
401 virtqueue_kick(sq->vq);
402 return true;
403 }
404
405 static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
406 {
407 return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
408 }
409
410 static struct sk_buff *receive_small(struct net_device *dev,
411 struct virtnet_info *vi,
412 struct receive_queue *rq,
413 void *buf, unsigned int len)
414 {
415 struct sk_buff *skb;
416 struct bpf_prog *xdp_prog;
417 unsigned int xdp_headroom = virtnet_get_headroom(vi);
418 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
419 unsigned int headroom = vi->hdr_len + header_offset;
420 unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
421 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
422 unsigned int delta = 0;
423 len -= vi->hdr_len;
424
425 rcu_read_lock();
426 xdp_prog = rcu_dereference(rq->xdp_prog);
427 if (xdp_prog) {
428 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
429 struct xdp_buff xdp;
430 void *orig_data;
431 u32 act;
432
433 if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
434 goto err_xdp;
435
436 xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
437 xdp.data = xdp.data_hard_start + xdp_headroom;
438 xdp.data_end = xdp.data + len;
439 orig_data = xdp.data;
440 act = bpf_prog_run_xdp(xdp_prog, &xdp);
441
442 switch (act) {
443 case XDP_PASS:
444 /* Recalculate length in case bpf program changed it */
445 delta = orig_data - xdp.data;
446 break;
447 case XDP_TX:
448 if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp)))
449 trace_xdp_exception(vi->dev, xdp_prog, act);
450 rcu_read_unlock();
451 goto xdp_xmit;
452 default:
453 bpf_warn_invalid_xdp_action(act);
454 case XDP_ABORTED:
455 trace_xdp_exception(vi->dev, xdp_prog, act);
456 case XDP_DROP:
457 goto err_xdp;
458 }
459 }
460 rcu_read_unlock();
461
462 skb = build_skb(buf, buflen);
463 if (!skb) {
464 put_page(virt_to_head_page(buf));
465 goto err;
466 }
467 skb_reserve(skb, headroom - delta);
468 skb_put(skb, len + delta);
469 if (!delta) {
470 buf += header_offset;
471 memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
472 } /* keep zeroed vnet hdr since packet was changed by bpf */
473
474 err:
475 return skb;
476
477 err_xdp:
478 rcu_read_unlock();
479 dev->stats.rx_dropped++;
480 put_page(virt_to_head_page(buf));
481 xdp_xmit:
482 return NULL;
483 }
484
485 static struct sk_buff *receive_big(struct net_device *dev,
486 struct virtnet_info *vi,
487 struct receive_queue *rq,
488 void *buf,
489 unsigned int len)
490 {
491 struct page *page = buf;
492 struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
493
494 if (unlikely(!skb))
495 goto err;
496
497 return skb;
498
499 err:
500 dev->stats.rx_dropped++;
501 give_pages(rq, page);
502 return NULL;
503 }
504
505 /* The conditions to enable XDP should preclude the underlying device from
506 * sending packets across multiple buffers (num_buf > 1). However per spec
507 * it does not appear to be illegal to do so but rather just against convention.
508 * So in order to avoid making a system unresponsive the packets are pushed
509 * into a page and the XDP program is run. This will be extremely slow and we
510 * push a warning to the user to fix this as soon as possible. Fixing this may
511 * require resolving the underlying hardware to determine why multiple buffers
512 * are being received or simply loading the XDP program in the ingress stack
513 * after the skb is built because there is no advantage to running it here
514 * anymore.
515 */
516 static struct page *xdp_linearize_page(struct receive_queue *rq,
517 u16 *num_buf,
518 struct page *p,
519 int offset,
520 unsigned int *len)
521 {
522 struct page *page = alloc_page(GFP_ATOMIC);
523 unsigned int page_off = VIRTIO_XDP_HEADROOM;
524
525 if (!page)
526 return NULL;
527
528 memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
529 page_off += *len;
530
531 while (--*num_buf) {
532 unsigned int buflen;
533 void *buf;
534 int off;
535
536 buf = virtqueue_get_buf(rq->vq, &buflen);
537 if (unlikely(!buf))
538 goto err_buf;
539
540 p = virt_to_head_page(buf);
541 off = buf - page_address(p);
542
543 /* guard against a misconfigured or uncooperative backend that
544 * is sending packet larger than the MTU.
545 */
546 if ((page_off + buflen) > PAGE_SIZE) {
547 put_page(p);
548 goto err_buf;
549 }
550
551 memcpy(page_address(page) + page_off,
552 page_address(p) + off, buflen);
553 page_off += buflen;
554 put_page(p);
555 }
556
557 /* Headroom does not contribute to packet length */
558 *len = page_off - VIRTIO_XDP_HEADROOM;
559 return page;
560 err_buf:
561 __free_pages(page, 0);
562 return NULL;
563 }
564
565 static struct sk_buff *receive_mergeable(struct net_device *dev,
566 struct virtnet_info *vi,
567 struct receive_queue *rq,
568 void *buf,
569 void *ctx,
570 unsigned int len)
571 {
572 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
573 u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
574 struct page *page = virt_to_head_page(buf);
575 int offset = buf - page_address(page);
576 struct sk_buff *head_skb, *curr_skb;
577 struct bpf_prog *xdp_prog;
578 unsigned int truesize;
579
580 head_skb = NULL;
581
582 rcu_read_lock();
583 xdp_prog = rcu_dereference(rq->xdp_prog);
584 if (xdp_prog) {
585 struct page *xdp_page;
586 struct xdp_buff xdp;
587 void *data;
588 u32 act;
589
590 /* This happens when rx buffer size is underestimated */
591 if (unlikely(num_buf > 1)) {
592 /* linearize data for XDP */
593 xdp_page = xdp_linearize_page(rq, &num_buf,
594 page, offset, &len);
595 if (!xdp_page)
596 goto err_xdp;
597 offset = VIRTIO_XDP_HEADROOM;
598 } else {
599 xdp_page = page;
600 }
601
602 /* Transient failure which in theory could occur if
603 * in-flight packets from before XDP was enabled reach
604 * the receive path after XDP is loaded. In practice I
605 * was not able to create this condition.
606 */
607 if (unlikely(hdr->hdr.gso_type))
608 goto err_xdp;
609
610 /* Allow consuming headroom but reserve enough space to push
611 * the descriptor on if we get an XDP_TX return code.
612 */
613 data = page_address(xdp_page) + offset;
614 xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
615 xdp.data = data + vi->hdr_len;
616 xdp.data_end = xdp.data + (len - vi->hdr_len);
617 act = bpf_prog_run_xdp(xdp_prog, &xdp);
618
619 switch (act) {
620 case XDP_PASS:
621 /* recalculate offset to account for any header
622 * adjustments. Note other cases do not build an
623 * skb and avoid using offset
624 */
625 offset = xdp.data -
626 page_address(xdp_page) - vi->hdr_len;
627
628 /* We can only create skb based on xdp_page. */
629 if (unlikely(xdp_page != page)) {
630 rcu_read_unlock();
631 put_page(page);
632 head_skb = page_to_skb(vi, rq, xdp_page,
633 offset, len, PAGE_SIZE);
634 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
635 return head_skb;
636 }
637 break;
638 case XDP_TX:
639 if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp)))
640 trace_xdp_exception(vi->dev, xdp_prog, act);
641 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
642 if (unlikely(xdp_page != page))
643 goto err_xdp;
644 rcu_read_unlock();
645 goto xdp_xmit;
646 default:
647 bpf_warn_invalid_xdp_action(act);
648 case XDP_ABORTED:
649 trace_xdp_exception(vi->dev, xdp_prog, act);
650 case XDP_DROP:
651 if (unlikely(xdp_page != page))
652 __free_pages(xdp_page, 0);
653 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
654 goto err_xdp;
655 }
656 }
657 rcu_read_unlock();
658
659 truesize = mergeable_ctx_to_truesize(ctx);
660 if (unlikely(len > truesize)) {
661 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
662 dev->name, len, (unsigned long)ctx);
663 dev->stats.rx_length_errors++;
664 goto err_skb;
665 }
666
667 head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
668 curr_skb = head_skb;
669
670 if (unlikely(!curr_skb))
671 goto err_skb;
672 while (--num_buf) {
673 int num_skb_frags;
674
675 buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
676 if (unlikely(!ctx)) {
677 pr_debug("%s: rx error: %d buffers out of %d missing\n",
678 dev->name, num_buf,
679 virtio16_to_cpu(vi->vdev,
680 hdr->num_buffers));
681 dev->stats.rx_length_errors++;
682 goto err_buf;
683 }
684
685 page = virt_to_head_page(buf);
686
687 truesize = mergeable_ctx_to_truesize(ctx);
688 if (unlikely(len > truesize)) {
689 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
690 dev->name, len, (unsigned long)ctx);
691 dev->stats.rx_length_errors++;
692 goto err_skb;
693 }
694
695 num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
696 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
697 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
698
699 if (unlikely(!nskb))
700 goto err_skb;
701 if (curr_skb == head_skb)
702 skb_shinfo(curr_skb)->frag_list = nskb;
703 else
704 curr_skb->next = nskb;
705 curr_skb = nskb;
706 head_skb->truesize += nskb->truesize;
707 num_skb_frags = 0;
708 }
709 if (curr_skb != head_skb) {
710 head_skb->data_len += len;
711 head_skb->len += len;
712 head_skb->truesize += truesize;
713 }
714 offset = buf - page_address(page);
715 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
716 put_page(page);
717 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
718 len, truesize);
719 } else {
720 skb_add_rx_frag(curr_skb, num_skb_frags, page,
721 offset, len, truesize);
722 }
723 }
724
725 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
726 return head_skb;
727
728 err_xdp:
729 rcu_read_unlock();
730 err_skb:
731 put_page(page);
732 while (--num_buf) {
733 buf = virtqueue_get_buf(rq->vq, &len);
734 if (unlikely(!buf)) {
735 pr_debug("%s: rx error: %d buffers missing\n",
736 dev->name, num_buf);
737 dev->stats.rx_length_errors++;
738 break;
739 }
740 page = virt_to_head_page(buf);
741 put_page(page);
742 }
743 err_buf:
744 dev->stats.rx_dropped++;
745 dev_kfree_skb(head_skb);
746 xdp_xmit:
747 return NULL;
748 }
749
750 static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
751 void *buf, unsigned int len, void **ctx)
752 {
753 struct net_device *dev = vi->dev;
754 struct sk_buff *skb;
755 struct virtio_net_hdr_mrg_rxbuf *hdr;
756 int ret;
757
758 if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
759 pr_debug("%s: short packet %i\n", dev->name, len);
760 dev->stats.rx_length_errors++;
761 if (vi->mergeable_rx_bufs) {
762 put_page(virt_to_head_page(buf));
763 } else if (vi->big_packets) {
764 give_pages(rq, buf);
765 } else {
766 put_page(virt_to_head_page(buf));
767 }
768 return 0;
769 }
770
771 if (vi->mergeable_rx_bufs)
772 skb = receive_mergeable(dev, vi, rq, buf, ctx, len);
773 else if (vi->big_packets)
774 skb = receive_big(dev, vi, rq, buf, len);
775 else
776 skb = receive_small(dev, vi, rq, buf, len);
777
778 if (unlikely(!skb))
779 return 0;
780
781 hdr = skb_vnet_hdr(skb);
782
783 ret = skb->len;
784
785 if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
786 skb->ip_summed = CHECKSUM_UNNECESSARY;
787
788 if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
789 virtio_is_little_endian(vi->vdev))) {
790 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
791 dev->name, hdr->hdr.gso_type,
792 hdr->hdr.gso_size);
793 goto frame_err;
794 }
795
796 skb->protocol = eth_type_trans(skb, dev);
797 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
798 ntohs(skb->protocol), skb->len, skb->pkt_type);
799
800 napi_gro_receive(&rq->napi, skb);
801 return ret;
802
803 frame_err:
804 dev->stats.rx_frame_errors++;
805 dev_kfree_skb(skb);
806 return 0;
807 }
808
809 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
810 gfp_t gfp)
811 {
812 struct page_frag *alloc_frag = &rq->alloc_frag;
813 char *buf;
814 unsigned int xdp_headroom = virtnet_get_headroom(vi);
815 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
816 int err;
817
818 len = SKB_DATA_ALIGN(len) +
819 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
820 if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
821 return -ENOMEM;
822
823 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
824 get_page(alloc_frag->page);
825 alloc_frag->offset += len;
826 sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
827 vi->hdr_len + GOOD_PACKET_LEN);
828 err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
829 if (err < 0)
830 put_page(virt_to_head_page(buf));
831
832 return err;
833 }
834
835 static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
836 gfp_t gfp)
837 {
838 struct page *first, *list = NULL;
839 char *p;
840 int i, err, offset;
841
842 sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
843
844 /* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
845 for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
846 first = get_a_page(rq, gfp);
847 if (!first) {
848 if (list)
849 give_pages(rq, list);
850 return -ENOMEM;
851 }
852 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
853
854 /* chain new page in list head to match sg */
855 first->private = (unsigned long)list;
856 list = first;
857 }
858
859 first = get_a_page(rq, gfp);
860 if (!first) {
861 give_pages(rq, list);
862 return -ENOMEM;
863 }
864 p = page_address(first);
865
866 /* rq->sg[0], rq->sg[1] share the same page */
867 /* a separated rq->sg[0] for header - required in case !any_header_sg */
868 sg_set_buf(&rq->sg[0], p, vi->hdr_len);
869
870 /* rq->sg[1] for data packet, from offset */
871 offset = sizeof(struct padded_vnet_hdr);
872 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
873
874 /* chain first in list head */
875 first->private = (unsigned long)list;
876 err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
877 first, gfp);
878 if (err < 0)
879 give_pages(rq, first);
880
881 return err;
882 }
883
884 static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
885 struct ewma_pkt_len *avg_pkt_len)
886 {
887 const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
888 unsigned int len;
889
890 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
891 rq->min_buf_len, PAGE_SIZE - hdr_len);
892 return ALIGN(len, L1_CACHE_BYTES);
893 }
894
895 static int add_recvbuf_mergeable(struct virtnet_info *vi,
896 struct receive_queue *rq, gfp_t gfp)
897 {
898 struct page_frag *alloc_frag = &rq->alloc_frag;
899 unsigned int headroom = virtnet_get_headroom(vi);
900 char *buf;
901 void *ctx;
902 int err;
903 unsigned int len, hole;
904
905 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
906 if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
907 return -ENOMEM;
908
909 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
910 buf += headroom; /* advance address leaving hole at front of pkt */
911 ctx = mergeable_len_to_ctx(len, headroom);
912 get_page(alloc_frag->page);
913 alloc_frag->offset += len + headroom;
914 hole = alloc_frag->size - alloc_frag->offset;
915 if (hole < len + headroom) {
916 /* To avoid internal fragmentation, if there is very likely not
917 * enough space for another buffer, add the remaining space to
918 * the current buffer. This extra space is not included in
919 * the truesize stored in ctx.
920 */
921 len += hole;
922 alloc_frag->offset += hole;
923 }
924
925 sg_init_one(rq->sg, buf, len);
926 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
927 if (err < 0)
928 put_page(virt_to_head_page(buf));
929
930 return err;
931 }
932
933 /*
934 * Returns false if we couldn't fill entirely (OOM).
935 *
936 * Normally run in the receive path, but can also be run from ndo_open
937 * before we're receiving packets, or from refill_work which is
938 * careful to disable receiving (using napi_disable).
939 */
940 static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
941 gfp_t gfp)
942 {
943 int err;
944 bool oom;
945
946 gfp |= __GFP_COLD;
947 do {
948 if (vi->mergeable_rx_bufs)
949 err = add_recvbuf_mergeable(vi, rq, gfp);
950 else if (vi->big_packets)
951 err = add_recvbuf_big(vi, rq, gfp);
952 else
953 err = add_recvbuf_small(vi, rq, gfp);
954
955 oom = err == -ENOMEM;
956 if (err)
957 break;
958 } while (rq->vq->num_free);
959 virtqueue_kick(rq->vq);
960 return !oom;
961 }
962
963 static void skb_recv_done(struct virtqueue *rvq)
964 {
965 struct virtnet_info *vi = rvq->vdev->priv;
966 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
967
968 virtqueue_napi_schedule(&rq->napi, rvq);
969 }
970
971 static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
972 {
973 napi_enable(napi);
974
975 /* If all buffers were filled by other side before we napi_enabled, we
976 * won't get another interrupt, so process any outstanding packets now.
977 * Call local_bh_enable after to trigger softIRQ processing.
978 */
979 local_bh_disable();
980 virtqueue_napi_schedule(napi, vq);
981 local_bh_enable();
982 }
983
984 static void virtnet_napi_tx_enable(struct virtnet_info *vi,
985 struct virtqueue *vq,
986 struct napi_struct *napi)
987 {
988 if (!napi->weight)
989 return;
990
991 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
992 * enable the feature if this is likely affine with the transmit path.
993 */
994 if (!vi->affinity_hint_set) {
995 napi->weight = 0;
996 return;
997 }
998
999 return virtnet_napi_enable(vq, napi);
1000 }
1001
1002 static void virtnet_napi_tx_disable(struct napi_struct *napi)
1003 {
1004 if (napi->weight)
1005 napi_disable(napi);
1006 }
1007
1008 static void refill_work(struct work_struct *work)
1009 {
1010 struct virtnet_info *vi =
1011 container_of(work, struct virtnet_info, refill.work);
1012 bool still_empty;
1013 int i;
1014
1015 for (i = 0; i < vi->curr_queue_pairs; i++) {
1016 struct receive_queue *rq = &vi->rq[i];
1017
1018 napi_disable(&rq->napi);
1019 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
1020 virtnet_napi_enable(rq->vq, &rq->napi);
1021
1022 /* In theory, this can happen: if we don't get any buffers in
1023 * we will *never* try to fill again.
1024 */
1025 if (still_empty)
1026 schedule_delayed_work(&vi->refill, HZ/2);
1027 }
1028 }
1029
1030 static int virtnet_receive(struct receive_queue *rq, int budget)
1031 {
1032 struct virtnet_info *vi = rq->vq->vdev->priv;
1033 unsigned int len, received = 0, bytes = 0;
1034 void *buf;
1035 struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
1036
1037 if (vi->mergeable_rx_bufs) {
1038 void *ctx;
1039
1040 while (received < budget &&
1041 (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
1042 bytes += receive_buf(vi, rq, buf, len, ctx);
1043 received++;
1044 }
1045 } else {
1046 while (received < budget &&
1047 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
1048 bytes += receive_buf(vi, rq, buf, len, NULL);
1049 received++;
1050 }
1051 }
1052
1053 if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
1054 if (!try_fill_recv(vi, rq, GFP_ATOMIC))
1055 schedule_delayed_work(&vi->refill, 0);
1056 }
1057
1058 u64_stats_update_begin(&stats->rx_syncp);
1059 stats->rx_bytes += bytes;
1060 stats->rx_packets += received;
1061 u64_stats_update_end(&stats->rx_syncp);
1062
1063 return received;
1064 }
1065
1066 static void free_old_xmit_skbs(struct send_queue *sq)
1067 {
1068 struct sk_buff *skb;
1069 unsigned int len;
1070 struct virtnet_info *vi = sq->vq->vdev->priv;
1071 struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
1072 unsigned int packets = 0;
1073 unsigned int bytes = 0;
1074
1075 while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
1076 pr_debug("Sent skb %p\n", skb);
1077
1078 bytes += skb->len;
1079 packets++;
1080
1081 dev_kfree_skb_any(skb);
1082 }
1083
1084 /* Avoid overhead when no packets have been processed
1085 * happens when called speculatively from start_xmit.
1086 */
1087 if (!packets)
1088 return;
1089
1090 u64_stats_update_begin(&stats->tx_syncp);
1091 stats->tx_bytes += bytes;
1092 stats->tx_packets += packets;
1093 u64_stats_update_end(&stats->tx_syncp);
1094 }
1095
1096 static void virtnet_poll_cleantx(struct receive_queue *rq)
1097 {
1098 struct virtnet_info *vi = rq->vq->vdev->priv;
1099 unsigned int index = vq2rxq(rq->vq);
1100 struct send_queue *sq = &vi->sq[index];
1101 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
1102
1103 if (!sq->napi.weight)
1104 return;
1105
1106 if (__netif_tx_trylock(txq)) {
1107 free_old_xmit_skbs(sq);
1108 __netif_tx_unlock(txq);
1109 }
1110
1111 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
1112 netif_tx_wake_queue(txq);
1113 }
1114
1115 static int virtnet_poll(struct napi_struct *napi, int budget)
1116 {
1117 struct receive_queue *rq =
1118 container_of(napi, struct receive_queue, napi);
1119 unsigned int received;
1120
1121 virtnet_poll_cleantx(rq);
1122
1123 received = virtnet_receive(rq, budget);
1124
1125 /* Out of packets? */
1126 if (received < budget)
1127 virtqueue_napi_complete(napi, rq->vq, received);
1128
1129 return received;
1130 }
1131
1132 static int virtnet_open(struct net_device *dev)
1133 {
1134 struct virtnet_info *vi = netdev_priv(dev);
1135 int i;
1136
1137 for (i = 0; i < vi->max_queue_pairs; i++) {
1138 if (i < vi->curr_queue_pairs)
1139 /* Make sure we have some buffers: if oom use wq. */
1140 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
1141 schedule_delayed_work(&vi->refill, 0);
1142 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
1143 virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
1144 }
1145
1146 return 0;
1147 }
1148
1149 static int virtnet_poll_tx(struct napi_struct *napi, int budget)
1150 {
1151 struct send_queue *sq = container_of(napi, struct send_queue, napi);
1152 struct virtnet_info *vi = sq->vq->vdev->priv;
1153 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
1154
1155 __netif_tx_lock(txq, raw_smp_processor_id());
1156 free_old_xmit_skbs(sq);
1157 __netif_tx_unlock(txq);
1158
1159 virtqueue_napi_complete(napi, sq->vq, 0);
1160
1161 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
1162 netif_tx_wake_queue(txq);
1163
1164 return 0;
1165 }
1166
1167 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
1168 {
1169 struct virtio_net_hdr_mrg_rxbuf *hdr;
1170 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
1171 struct virtnet_info *vi = sq->vq->vdev->priv;
1172 int num_sg;
1173 unsigned hdr_len = vi->hdr_len;
1174 bool can_push;
1175
1176 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
1177
1178 can_push = vi->any_header_sg &&
1179 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
1180 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
1181 /* Even if we can, don't push here yet as this would skew
1182 * csum_start offset below. */
1183 if (can_push)
1184 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
1185 else
1186 hdr = skb_vnet_hdr(skb);
1187
1188 if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
1189 virtio_is_little_endian(vi->vdev), false))
1190 BUG();
1191
1192 if (vi->mergeable_rx_bufs)
1193 hdr->num_buffers = 0;
1194
1195 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
1196 if (can_push) {
1197 __skb_push(skb, hdr_len);
1198 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
1199 if (unlikely(num_sg < 0))
1200 return num_sg;
1201 /* Pull header back to avoid skew in tx bytes calculations. */
1202 __skb_pull(skb, hdr_len);
1203 } else {
1204 sg_set_buf(sq->sg, hdr, hdr_len);
1205 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
1206 if (unlikely(num_sg < 0))
1207 return num_sg;
1208 num_sg++;
1209 }
1210 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
1211 }
1212
1213 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
1214 {
1215 struct virtnet_info *vi = netdev_priv(dev);
1216 int qnum = skb_get_queue_mapping(skb);
1217 struct send_queue *sq = &vi->sq[qnum];
1218 int err;
1219 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
1220 bool kick = !skb->xmit_more;
1221 bool use_napi = sq->napi.weight;
1222
1223 /* Free up any pending old buffers before queueing new ones. */
1224 free_old_xmit_skbs(sq);
1225
1226 if (use_napi && kick)
1227 virtqueue_enable_cb_delayed(sq->vq);
1228
1229 /* timestamp packet in software */
1230 skb_tx_timestamp(skb);
1231
1232 /* Try to transmit */
1233 err = xmit_skb(sq, skb);
1234
1235 /* This should not happen! */
1236 if (unlikely(err)) {
1237 dev->stats.tx_fifo_errors++;
1238 if (net_ratelimit())
1239 dev_warn(&dev->dev,
1240 "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
1241 dev->stats.tx_dropped++;
1242 dev_kfree_skb_any(skb);
1243 return NETDEV_TX_OK;
1244 }
1245
1246 /* Don't wait up for transmitted skbs to be freed. */
1247 if (!use_napi) {
1248 skb_orphan(skb);
1249 nf_reset(skb);
1250 }
1251
1252 /* If running out of space, stop queue to avoid getting packets that we
1253 * are then unable to transmit.
1254 * An alternative would be to force queuing layer to requeue the skb by
1255 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
1256 * returned in a normal path of operation: it means that driver is not
1257 * maintaining the TX queue stop/start state properly, and causes
1258 * the stack to do a non-trivial amount of useless work.
1259 * Since most packets only take 1 or 2 ring slots, stopping the queue
1260 * early means 16 slots are typically wasted.
1261 */
1262 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
1263 netif_stop_subqueue(dev, qnum);
1264 if (!use_napi &&
1265 unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
1266 /* More just got used, free them then recheck. */
1267 free_old_xmit_skbs(sq);
1268 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
1269 netif_start_subqueue(dev, qnum);
1270 virtqueue_disable_cb(sq->vq);
1271 }
1272 }
1273 }
1274
1275 if (kick || netif_xmit_stopped(txq))
1276 virtqueue_kick(sq->vq);
1277
1278 return NETDEV_TX_OK;
1279 }
1280
1281 /*
1282 * Send command via the control virtqueue and check status. Commands
1283 * supported by the hypervisor, as indicated by feature bits, should
1284 * never fail unless improperly formatted.
1285 */
1286 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
1287 struct scatterlist *out)
1288 {
1289 struct scatterlist *sgs[4], hdr, stat;
1290 unsigned out_num = 0, tmp;
1291
1292 /* Caller should know better */
1293 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
1294
1295 vi->ctrl_status = ~0;
1296 vi->ctrl_hdr.class = class;
1297 vi->ctrl_hdr.cmd = cmd;
1298 /* Add header */
1299 sg_init_one(&hdr, &vi->ctrl_hdr, sizeof(vi->ctrl_hdr));
1300 sgs[out_num++] = &hdr;
1301
1302 if (out)
1303 sgs[out_num++] = out;
1304
1305 /* Add return status. */
1306 sg_init_one(&stat, &vi->ctrl_status, sizeof(vi->ctrl_status));
1307 sgs[out_num] = &stat;
1308
1309 BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
1310 virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
1311
1312 if (unlikely(!virtqueue_kick(vi->cvq)))
1313 return vi->ctrl_status == VIRTIO_NET_OK;
1314
1315 /* Spin for a response, the kick causes an ioport write, trapping
1316 * into the hypervisor, so the request should be handled immediately.
1317 */
1318 while (!virtqueue_get_buf(vi->cvq, &tmp) &&
1319 !virtqueue_is_broken(vi->cvq))
1320 cpu_relax();
1321
1322 return vi->ctrl_status == VIRTIO_NET_OK;
1323 }
1324
1325 static int virtnet_set_mac_address(struct net_device *dev, void *p)
1326 {
1327 struct virtnet_info *vi = netdev_priv(dev);
1328 struct virtio_device *vdev = vi->vdev;
1329 int ret;
1330 struct sockaddr *addr;
1331 struct scatterlist sg;
1332
1333 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
1334 if (!addr)
1335 return -ENOMEM;
1336
1337 ret = eth_prepare_mac_addr_change(dev, addr);
1338 if (ret)
1339 goto out;
1340
1341 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1342 sg_init_one(&sg, addr->sa_data, dev->addr_len);
1343 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1344 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
1345 dev_warn(&vdev->dev,
1346 "Failed to set mac address by vq command.\n");
1347 ret = -EINVAL;
1348 goto out;
1349 }
1350 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
1351 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1352 unsigned int i;
1353
1354 /* Naturally, this has an atomicity problem. */
1355 for (i = 0; i < dev->addr_len; i++)
1356 virtio_cwrite8(vdev,
1357 offsetof(struct virtio_net_config, mac) +
1358 i, addr->sa_data[i]);
1359 }
1360
1361 eth_commit_mac_addr_change(dev, p);
1362 ret = 0;
1363
1364 out:
1365 kfree(addr);
1366 return ret;
1367 }
1368
1369 static void virtnet_stats(struct net_device *dev,
1370 struct rtnl_link_stats64 *tot)
1371 {
1372 struct virtnet_info *vi = netdev_priv(dev);
1373 int cpu;
1374 unsigned int start;
1375
1376 for_each_possible_cpu(cpu) {
1377 struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
1378 u64 tpackets, tbytes, rpackets, rbytes;
1379
1380 do {
1381 start = u64_stats_fetch_begin_irq(&stats->tx_syncp);
1382 tpackets = stats->tx_packets;
1383 tbytes = stats->tx_bytes;
1384 } while (u64_stats_fetch_retry_irq(&stats->tx_syncp, start));
1385
1386 do {
1387 start = u64_stats_fetch_begin_irq(&stats->rx_syncp);
1388 rpackets = stats->rx_packets;
1389 rbytes = stats->rx_bytes;
1390 } while (u64_stats_fetch_retry_irq(&stats->rx_syncp, start));
1391
1392 tot->rx_packets += rpackets;
1393 tot->tx_packets += tpackets;
1394 tot->rx_bytes += rbytes;
1395 tot->tx_bytes += tbytes;
1396 }
1397
1398 tot->tx_dropped = dev->stats.tx_dropped;
1399 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
1400 tot->rx_dropped = dev->stats.rx_dropped;
1401 tot->rx_length_errors = dev->stats.rx_length_errors;
1402 tot->rx_frame_errors = dev->stats.rx_frame_errors;
1403 }
1404
1405 #ifdef CONFIG_NET_POLL_CONTROLLER
1406 static void virtnet_netpoll(struct net_device *dev)
1407 {
1408 struct virtnet_info *vi = netdev_priv(dev);
1409 int i;
1410
1411 for (i = 0; i < vi->curr_queue_pairs; i++)
1412 napi_schedule(&vi->rq[i].napi);
1413 }
1414 #endif
1415
1416 static void virtnet_ack_link_announce(struct virtnet_info *vi)
1417 {
1418 rtnl_lock();
1419 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
1420 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
1421 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
1422 rtnl_unlock();
1423 }
1424
1425 static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
1426 {
1427 struct scatterlist sg;
1428 struct net_device *dev = vi->dev;
1429
1430 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
1431 return 0;
1432
1433 vi->ctrl_mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
1434 sg_init_one(&sg, &vi->ctrl_mq, sizeof(vi->ctrl_mq));
1435
1436 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
1437 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
1438 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
1439 queue_pairs);
1440 return -EINVAL;
1441 } else {
1442 vi->curr_queue_pairs = queue_pairs;
1443 /* virtnet_open() will refill when device is going to up. */
1444 if (dev->flags & IFF_UP)
1445 schedule_delayed_work(&vi->refill, 0);
1446 }
1447
1448 return 0;
1449 }
1450
1451 static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
1452 {
1453 int err;
1454
1455 rtnl_lock();
1456 err = _virtnet_set_queues(vi, queue_pairs);
1457 rtnl_unlock();
1458 return err;
1459 }
1460
1461 static int virtnet_close(struct net_device *dev)
1462 {
1463 struct virtnet_info *vi = netdev_priv(dev);
1464 int i;
1465
1466 /* Make sure refill_work doesn't re-enable napi! */
1467 cancel_delayed_work_sync(&vi->refill);
1468
1469 for (i = 0; i < vi->max_queue_pairs; i++) {
1470 napi_disable(&vi->rq[i].napi);
1471 virtnet_napi_tx_disable(&vi->sq[i].napi);
1472 }
1473
1474 return 0;
1475 }
1476
1477 static void virtnet_set_rx_mode(struct net_device *dev)
1478 {
1479 struct virtnet_info *vi = netdev_priv(dev);
1480 struct scatterlist sg[2];
1481 struct virtio_net_ctrl_mac *mac_data;
1482 struct netdev_hw_addr *ha;
1483 int uc_count;
1484 int mc_count;
1485 void *buf;
1486 int i;
1487
1488 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
1489 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
1490 return;
1491
1492 vi->ctrl_promisc = ((dev->flags & IFF_PROMISC) != 0);
1493 vi->ctrl_allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
1494
1495 sg_init_one(sg, &vi->ctrl_promisc, sizeof(vi->ctrl_promisc));
1496
1497 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1498 VIRTIO_NET_CTRL_RX_PROMISC, sg))
1499 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
1500 vi->ctrl_promisc ? "en" : "dis");
1501
1502 sg_init_one(sg, &vi->ctrl_allmulti, sizeof(vi->ctrl_allmulti));
1503
1504 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1505 VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
1506 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
1507 vi->ctrl_allmulti ? "en" : "dis");
1508
1509 uc_count = netdev_uc_count(dev);
1510 mc_count = netdev_mc_count(dev);
1511 /* MAC filter - use one buffer for both lists */
1512 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
1513 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
1514 mac_data = buf;
1515 if (!buf)
1516 return;
1517
1518 sg_init_table(sg, 2);
1519
1520 /* Store the unicast list and count in the front of the buffer */
1521 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
1522 i = 0;
1523 netdev_for_each_uc_addr(ha, dev)
1524 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1525
1526 sg_set_buf(&sg[0], mac_data,
1527 sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
1528
1529 /* multicast list and count fill the end */
1530 mac_data = (void *)&mac_data->macs[uc_count][0];
1531
1532 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
1533 i = 0;
1534 netdev_for_each_mc_addr(ha, dev)
1535 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1536
1537 sg_set_buf(&sg[1], mac_data,
1538 sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
1539
1540 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1541 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
1542 dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
1543
1544 kfree(buf);
1545 }
1546
1547 static int virtnet_vlan_rx_add_vid(struct net_device *dev,
1548 __be16 proto, u16 vid)
1549 {
1550 struct virtnet_info *vi = netdev_priv(dev);
1551 struct scatterlist sg;
1552
1553 vi->ctrl_vid = vid;
1554 sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid));
1555
1556 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1557 VIRTIO_NET_CTRL_VLAN_ADD, &sg))
1558 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
1559 return 0;
1560 }
1561
1562 static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
1563 __be16 proto, u16 vid)
1564 {
1565 struct virtnet_info *vi = netdev_priv(dev);
1566 struct scatterlist sg;
1567
1568 vi->ctrl_vid = vid;
1569 sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid));
1570
1571 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1572 VIRTIO_NET_CTRL_VLAN_DEL, &sg))
1573 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
1574 return 0;
1575 }
1576
1577 static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
1578 {
1579 int i;
1580
1581 if (vi->affinity_hint_set) {
1582 for (i = 0; i < vi->max_queue_pairs; i++) {
1583 virtqueue_set_affinity(vi->rq[i].vq, -1);
1584 virtqueue_set_affinity(vi->sq[i].vq, -1);
1585 }
1586
1587 vi->affinity_hint_set = false;
1588 }
1589 }
1590
1591 static void virtnet_set_affinity(struct virtnet_info *vi)
1592 {
1593 int i;
1594 int cpu;
1595
1596 /* In multiqueue mode, when the number of cpu is equal to the number of
1597 * queue pairs, we let the queue pairs to be private to one cpu by
1598 * setting the affinity hint to eliminate the contention.
1599 */
1600 if (vi->curr_queue_pairs == 1 ||
1601 vi->max_queue_pairs != num_online_cpus()) {
1602 virtnet_clean_affinity(vi, -1);
1603 return;
1604 }
1605
1606 i = 0;
1607 for_each_online_cpu(cpu) {
1608 virtqueue_set_affinity(vi->rq[i].vq, cpu);
1609 virtqueue_set_affinity(vi->sq[i].vq, cpu);
1610 netif_set_xps_queue(vi->dev, cpumask_of(cpu), i);
1611 i++;
1612 }
1613
1614 vi->affinity_hint_set = true;
1615 }
1616
1617 static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
1618 {
1619 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1620 node);
1621 virtnet_set_affinity(vi);
1622 return 0;
1623 }
1624
1625 static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
1626 {
1627 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1628 node_dead);
1629 virtnet_set_affinity(vi);
1630 return 0;
1631 }
1632
1633 static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
1634 {
1635 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1636 node);
1637
1638 virtnet_clean_affinity(vi, cpu);
1639 return 0;
1640 }
1641
1642 static enum cpuhp_state virtionet_online;
1643
1644 static int virtnet_cpu_notif_add(struct virtnet_info *vi)
1645 {
1646 int ret;
1647
1648 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
1649 if (ret)
1650 return ret;
1651 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
1652 &vi->node_dead);
1653 if (!ret)
1654 return ret;
1655 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
1656 return ret;
1657 }
1658
1659 static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
1660 {
1661 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
1662 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
1663 &vi->node_dead);
1664 }
1665
1666 static void virtnet_get_ringparam(struct net_device *dev,
1667 struct ethtool_ringparam *ring)
1668 {
1669 struct virtnet_info *vi = netdev_priv(dev);
1670
1671 ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
1672 ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
1673 ring->rx_pending = ring->rx_max_pending;
1674 ring->tx_pending = ring->tx_max_pending;
1675 }
1676
1677
1678 static void virtnet_get_drvinfo(struct net_device *dev,
1679 struct ethtool_drvinfo *info)
1680 {
1681 struct virtnet_info *vi = netdev_priv(dev);
1682 struct virtio_device *vdev = vi->vdev;
1683
1684 strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
1685 strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
1686 strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
1687
1688 }
1689
1690 /* TODO: Eliminate OOO packets during switching */
1691 static int virtnet_set_channels(struct net_device *dev,
1692 struct ethtool_channels *channels)
1693 {
1694 struct virtnet_info *vi = netdev_priv(dev);
1695 u16 queue_pairs = channels->combined_count;
1696 int err;
1697
1698 /* We don't support separate rx/tx channels.
1699 * We don't allow setting 'other' channels.
1700 */
1701 if (channels->rx_count || channels->tx_count || channels->other_count)
1702 return -EINVAL;
1703
1704 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
1705 return -EINVAL;
1706
1707 /* For now we don't support modifying channels while XDP is loaded
1708 * also when XDP is loaded all RX queues have XDP programs so we only
1709 * need to check a single RX queue.
1710 */
1711 if (vi->rq[0].xdp_prog)
1712 return -EINVAL;
1713
1714 get_online_cpus();
1715 err = _virtnet_set_queues(vi, queue_pairs);
1716 if (!err) {
1717 netif_set_real_num_tx_queues(dev, queue_pairs);
1718 netif_set_real_num_rx_queues(dev, queue_pairs);
1719
1720 virtnet_set_affinity(vi);
1721 }
1722 put_online_cpus();
1723
1724 return err;
1725 }
1726
1727 static void virtnet_get_channels(struct net_device *dev,
1728 struct ethtool_channels *channels)
1729 {
1730 struct virtnet_info *vi = netdev_priv(dev);
1731
1732 channels->combined_count = vi->curr_queue_pairs;
1733 channels->max_combined = vi->max_queue_pairs;
1734 channels->max_other = 0;
1735 channels->rx_count = 0;
1736 channels->tx_count = 0;
1737 channels->other_count = 0;
1738 }
1739
1740 /* Check if the user is trying to change anything besides speed/duplex */
1741 static bool
1742 virtnet_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd)
1743 {
1744 struct ethtool_link_ksettings diff1 = *cmd;
1745 struct ethtool_link_ksettings diff2 = {};
1746
1747 /* cmd is always set so we need to clear it, validate the port type
1748 * and also without autonegotiation we can ignore advertising
1749 */
1750 diff1.base.speed = 0;
1751 diff2.base.port = PORT_OTHER;
1752 ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
1753 diff1.base.duplex = 0;
1754 diff1.base.cmd = 0;
1755 diff1.base.link_mode_masks_nwords = 0;
1756
1757 return !memcmp(&diff1.base, &diff2.base, sizeof(diff1.base)) &&
1758 bitmap_empty(diff1.link_modes.supported,
1759 __ETHTOOL_LINK_MODE_MASK_NBITS) &&
1760 bitmap_empty(diff1.link_modes.advertising,
1761 __ETHTOOL_LINK_MODE_MASK_NBITS) &&
1762 bitmap_empty(diff1.link_modes.lp_advertising,
1763 __ETHTOOL_LINK_MODE_MASK_NBITS);
1764 }
1765
1766 static int virtnet_set_link_ksettings(struct net_device *dev,
1767 const struct ethtool_link_ksettings *cmd)
1768 {
1769 struct virtnet_info *vi = netdev_priv(dev);
1770 u32 speed;
1771
1772 speed = cmd->base.speed;
1773 /* don't allow custom speed and duplex */
1774 if (!ethtool_validate_speed(speed) ||
1775 !ethtool_validate_duplex(cmd->base.duplex) ||
1776 !virtnet_validate_ethtool_cmd(cmd))
1777 return -EINVAL;
1778 vi->speed = speed;
1779 vi->duplex = cmd->base.duplex;
1780
1781 return 0;
1782 }
1783
1784 static int virtnet_get_link_ksettings(struct net_device *dev,
1785 struct ethtool_link_ksettings *cmd)
1786 {
1787 struct virtnet_info *vi = netdev_priv(dev);
1788
1789 cmd->base.speed = vi->speed;
1790 cmd->base.duplex = vi->duplex;
1791 cmd->base.port = PORT_OTHER;
1792
1793 return 0;
1794 }
1795
1796 static void virtnet_init_settings(struct net_device *dev)
1797 {
1798 struct virtnet_info *vi = netdev_priv(dev);
1799
1800 vi->speed = SPEED_UNKNOWN;
1801 vi->duplex = DUPLEX_UNKNOWN;
1802 }
1803
1804 static const struct ethtool_ops virtnet_ethtool_ops = {
1805 .get_drvinfo = virtnet_get_drvinfo,
1806 .get_link = ethtool_op_get_link,
1807 .get_ringparam = virtnet_get_ringparam,
1808 .set_channels = virtnet_set_channels,
1809 .get_channels = virtnet_get_channels,
1810 .get_ts_info = ethtool_op_get_ts_info,
1811 .get_link_ksettings = virtnet_get_link_ksettings,
1812 .set_link_ksettings = virtnet_set_link_ksettings,
1813 };
1814
1815 static void virtnet_freeze_down(struct virtio_device *vdev)
1816 {
1817 struct virtnet_info *vi = vdev->priv;
1818 int i;
1819
1820 /* Make sure no work handler is accessing the device */
1821 flush_work(&vi->config_work);
1822
1823 netif_device_detach(vi->dev);
1824 netif_tx_disable(vi->dev);
1825 cancel_delayed_work_sync(&vi->refill);
1826
1827 if (netif_running(vi->dev)) {
1828 for (i = 0; i < vi->max_queue_pairs; i++) {
1829 napi_disable(&vi->rq[i].napi);
1830 virtnet_napi_tx_disable(&vi->sq[i].napi);
1831 }
1832 }
1833 }
1834
1835 static int init_vqs(struct virtnet_info *vi);
1836 static void _remove_vq_common(struct virtnet_info *vi);
1837
1838 static int virtnet_restore_up(struct virtio_device *vdev)
1839 {
1840 struct virtnet_info *vi = vdev->priv;
1841 int err, i;
1842
1843 err = init_vqs(vi);
1844 if (err)
1845 return err;
1846
1847 virtio_device_ready(vdev);
1848
1849 if (netif_running(vi->dev)) {
1850 for (i = 0; i < vi->curr_queue_pairs; i++)
1851 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
1852 schedule_delayed_work(&vi->refill, 0);
1853
1854 for (i = 0; i < vi->max_queue_pairs; i++) {
1855 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
1856 virtnet_napi_tx_enable(vi, vi->sq[i].vq,
1857 &vi->sq[i].napi);
1858 }
1859 }
1860
1861 netif_device_attach(vi->dev);
1862 return err;
1863 }
1864
1865 static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp)
1866 {
1867 struct virtio_device *dev = vi->vdev;
1868 int ret;
1869
1870 virtio_config_disable(dev);
1871 dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
1872 virtnet_freeze_down(dev);
1873 _remove_vq_common(vi);
1874
1875 virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
1876 virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
1877
1878 ret = virtio_finalize_features(dev);
1879 if (ret)
1880 goto err;
1881
1882 vi->xdp_queue_pairs = xdp_qp;
1883 ret = virtnet_restore_up(dev);
1884 if (ret)
1885 goto err;
1886 ret = _virtnet_set_queues(vi, curr_qp);
1887 if (ret)
1888 goto err;
1889
1890 virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
1891 virtio_config_enable(dev);
1892 return 0;
1893 err:
1894 virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
1895 return ret;
1896 }
1897
1898 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1899 struct netlink_ext_ack *extack)
1900 {
1901 unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
1902 struct virtnet_info *vi = netdev_priv(dev);
1903 struct bpf_prog *old_prog;
1904 u16 xdp_qp = 0, curr_qp;
1905 int i, err;
1906
1907 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
1908 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
1909 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
1910 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) {
1911 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
1912 return -EOPNOTSUPP;
1913 }
1914
1915 if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
1916 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
1917 return -EINVAL;
1918 }
1919
1920 if (dev->mtu > max_sz) {
1921 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
1922 netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
1923 return -EINVAL;
1924 }
1925
1926 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
1927 if (prog)
1928 xdp_qp = nr_cpu_ids;
1929
1930 /* XDP requires extra queues for XDP_TX */
1931 if (curr_qp + xdp_qp > vi->max_queue_pairs) {
1932 NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
1933 netdev_warn(dev, "request %i queues but max is %i\n",
1934 curr_qp + xdp_qp, vi->max_queue_pairs);
1935 return -ENOMEM;
1936 }
1937
1938 if (prog) {
1939 prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
1940 if (IS_ERR(prog))
1941 return PTR_ERR(prog);
1942 }
1943
1944 /* Changing the headroom in buffers is a disruptive operation because
1945 * existing buffers must be flushed and reallocated. This will happen
1946 * when a xdp program is initially added or xdp is disabled by removing
1947 * the xdp program resulting in number of XDP queues changing.
1948 */
1949 if (vi->xdp_queue_pairs != xdp_qp) {
1950 err = virtnet_reset(vi, curr_qp + xdp_qp, xdp_qp);
1951 if (err) {
1952 dev_warn(&dev->dev, "XDP reset failure.\n");
1953 goto virtio_reset_err;
1954 }
1955 }
1956
1957 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
1958
1959 for (i = 0; i < vi->max_queue_pairs; i++) {
1960 old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
1961 rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
1962 if (old_prog)
1963 bpf_prog_put(old_prog);
1964 }
1965
1966 return 0;
1967
1968 virtio_reset_err:
1969 /* On reset error do our best to unwind XDP changes inflight and return
1970 * error up to user space for resolution. The underlying reset hung on
1971 * us so not much we can do here.
1972 */
1973 if (prog)
1974 bpf_prog_sub(prog, vi->max_queue_pairs - 1);
1975 return err;
1976 }
1977
1978 static u32 virtnet_xdp_query(struct net_device *dev)
1979 {
1980 struct virtnet_info *vi = netdev_priv(dev);
1981 const struct bpf_prog *xdp_prog;
1982 int i;
1983
1984 for (i = 0; i < vi->max_queue_pairs; i++) {
1985 xdp_prog = rtnl_dereference(vi->rq[i].xdp_prog);
1986 if (xdp_prog)
1987 return xdp_prog->aux->id;
1988 }
1989 return 0;
1990 }
1991
1992 static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp)
1993 {
1994 switch (xdp->command) {
1995 case XDP_SETUP_PROG:
1996 return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
1997 case XDP_QUERY_PROG:
1998 xdp->prog_id = virtnet_xdp_query(dev);
1999 xdp->prog_attached = !!xdp->prog_id;
2000 return 0;
2001 default:
2002 return -EINVAL;
2003 }
2004 }
2005
2006 static const struct net_device_ops virtnet_netdev = {
2007 .ndo_open = virtnet_open,
2008 .ndo_stop = virtnet_close,
2009 .ndo_start_xmit = start_xmit,
2010 .ndo_validate_addr = eth_validate_addr,
2011 .ndo_set_mac_address = virtnet_set_mac_address,
2012 .ndo_set_rx_mode = virtnet_set_rx_mode,
2013 .ndo_get_stats64 = virtnet_stats,
2014 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
2015 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
2016 #ifdef CONFIG_NET_POLL_CONTROLLER
2017 .ndo_poll_controller = virtnet_netpoll,
2018 #endif
2019 .ndo_xdp = virtnet_xdp,
2020 .ndo_features_check = passthru_features_check,
2021 };
2022
2023 static void virtnet_config_changed_work(struct work_struct *work)
2024 {
2025 struct virtnet_info *vi =
2026 container_of(work, struct virtnet_info, config_work);
2027 u16 v;
2028
2029 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
2030 struct virtio_net_config, status, &v) < 0)
2031 return;
2032
2033 if (v & VIRTIO_NET_S_ANNOUNCE) {
2034 netdev_notify_peers(vi->dev);
2035 virtnet_ack_link_announce(vi);
2036 }
2037
2038 /* Ignore unknown (future) status bits */
2039 v &= VIRTIO_NET_S_LINK_UP;
2040
2041 if (vi->status == v)
2042 return;
2043
2044 vi->status = v;
2045
2046 if (vi->status & VIRTIO_NET_S_LINK_UP) {
2047 netif_carrier_on(vi->dev);
2048 netif_tx_wake_all_queues(vi->dev);
2049 } else {
2050 netif_carrier_off(vi->dev);
2051 netif_tx_stop_all_queues(vi->dev);
2052 }
2053 }
2054
2055 static void virtnet_config_changed(struct virtio_device *vdev)
2056 {
2057 struct virtnet_info *vi = vdev->priv;
2058
2059 schedule_work(&vi->config_work);
2060 }
2061
2062 static void virtnet_free_queues(struct virtnet_info *vi)
2063 {
2064 int i;
2065
2066 for (i = 0; i < vi->max_queue_pairs; i++) {
2067 napi_hash_del(&vi->rq[i].napi);
2068 netif_napi_del(&vi->rq[i].napi);
2069 netif_napi_del(&vi->sq[i].napi);
2070 }
2071
2072 /* We called napi_hash_del() before netif_napi_del(),
2073 * we need to respect an RCU grace period before freeing vi->rq
2074 */
2075 synchronize_net();
2076
2077 kfree(vi->rq);
2078 kfree(vi->sq);
2079 }
2080
2081 static void _free_receive_bufs(struct virtnet_info *vi)
2082 {
2083 struct bpf_prog *old_prog;
2084 int i;
2085
2086 for (i = 0; i < vi->max_queue_pairs; i++) {
2087 while (vi->rq[i].pages)
2088 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
2089
2090 old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
2091 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
2092 if (old_prog)
2093 bpf_prog_put(old_prog);
2094 }
2095 }
2096
2097 static void free_receive_bufs(struct virtnet_info *vi)
2098 {
2099 rtnl_lock();
2100 _free_receive_bufs(vi);
2101 rtnl_unlock();
2102 }
2103
2104 static void free_receive_page_frags(struct virtnet_info *vi)
2105 {
2106 int i;
2107 for (i = 0; i < vi->max_queue_pairs; i++)
2108 if (vi->rq[i].alloc_frag.page)
2109 put_page(vi->rq[i].alloc_frag.page);
2110 }
2111
2112 static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
2113 {
2114 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
2115 return false;
2116 else if (q < vi->curr_queue_pairs)
2117 return true;
2118 else
2119 return false;
2120 }
2121
2122 static void free_unused_bufs(struct virtnet_info *vi)
2123 {
2124 void *buf;
2125 int i;
2126
2127 for (i = 0; i < vi->max_queue_pairs; i++) {
2128 struct virtqueue *vq = vi->sq[i].vq;
2129 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
2130 if (!is_xdp_raw_buffer_queue(vi, i))
2131 dev_kfree_skb(buf);
2132 else
2133 put_page(virt_to_head_page(buf));
2134 }
2135 }
2136
2137 for (i = 0; i < vi->max_queue_pairs; i++) {
2138 struct virtqueue *vq = vi->rq[i].vq;
2139
2140 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
2141 if (vi->mergeable_rx_bufs) {
2142 put_page(virt_to_head_page(buf));
2143 } else if (vi->big_packets) {
2144 give_pages(&vi->rq[i], buf);
2145 } else {
2146 put_page(virt_to_head_page(buf));
2147 }
2148 }
2149 }
2150 }
2151
2152 static void virtnet_del_vqs(struct virtnet_info *vi)
2153 {
2154 struct virtio_device *vdev = vi->vdev;
2155
2156 virtnet_clean_affinity(vi, -1);
2157
2158 vdev->config->del_vqs(vdev);
2159
2160 virtnet_free_queues(vi);
2161 }
2162
2163 /* How large should a single buffer be so a queue full of these can fit at
2164 * least one full packet?
2165 * Logic below assumes the mergeable buffer header is used.
2166 */
2167 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
2168 {
2169 const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2170 unsigned int rq_size = virtqueue_get_vring_size(vq);
2171 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
2172 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
2173 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
2174
2175 return max(max(min_buf_len, hdr_len) - hdr_len,
2176 (unsigned int)GOOD_PACKET_LEN);
2177 }
2178
2179 static int virtnet_find_vqs(struct virtnet_info *vi)
2180 {
2181 vq_callback_t **callbacks;
2182 struct virtqueue **vqs;
2183 int ret = -ENOMEM;
2184 int i, total_vqs;
2185 const char **names;
2186 bool *ctx;
2187
2188 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
2189 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
2190 * possible control vq.
2191 */
2192 total_vqs = vi->max_queue_pairs * 2 +
2193 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
2194
2195 /* Allocate space for find_vqs parameters */
2196 vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
2197 if (!vqs)
2198 goto err_vq;
2199 callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
2200 if (!callbacks)
2201 goto err_callback;
2202 names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
2203 if (!names)
2204 goto err_names;
2205 if (vi->mergeable_rx_bufs) {
2206 ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
2207 if (!ctx)
2208 goto err_ctx;
2209 } else {
2210 ctx = NULL;
2211 }
2212
2213 /* Parameters for control virtqueue, if any */
2214 if (vi->has_cvq) {
2215 callbacks[total_vqs - 1] = NULL;
2216 names[total_vqs - 1] = "control";
2217 }
2218
2219 /* Allocate/initialize parameters for send/receive virtqueues */
2220 for (i = 0; i < vi->max_queue_pairs; i++) {
2221 callbacks[rxq2vq(i)] = skb_recv_done;
2222 callbacks[txq2vq(i)] = skb_xmit_done;
2223 sprintf(vi->rq[i].name, "input.%d", i);
2224 sprintf(vi->sq[i].name, "output.%d", i);
2225 names[rxq2vq(i)] = vi->rq[i].name;
2226 names[txq2vq(i)] = vi->sq[i].name;
2227 if (ctx)
2228 ctx[rxq2vq(i)] = true;
2229 }
2230
2231 ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
2232 names, ctx, NULL);
2233 if (ret)
2234 goto err_find;
2235
2236 if (vi->has_cvq) {
2237 vi->cvq = vqs[total_vqs - 1];
2238 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
2239 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
2240 }
2241
2242 for (i = 0; i < vi->max_queue_pairs; i++) {
2243 vi->rq[i].vq = vqs[rxq2vq(i)];
2244 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
2245 vi->sq[i].vq = vqs[txq2vq(i)];
2246 }
2247
2248 kfree(names);
2249 kfree(callbacks);
2250 kfree(vqs);
2251 kfree(ctx);
2252
2253 return 0;
2254
2255 err_find:
2256 kfree(ctx);
2257 err_ctx:
2258 kfree(names);
2259 err_names:
2260 kfree(callbacks);
2261 err_callback:
2262 kfree(vqs);
2263 err_vq:
2264 return ret;
2265 }
2266
2267 static int virtnet_alloc_queues(struct virtnet_info *vi)
2268 {
2269 int i;
2270
2271 vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
2272 if (!vi->sq)
2273 goto err_sq;
2274 vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
2275 if (!vi->rq)
2276 goto err_rq;
2277
2278 INIT_DELAYED_WORK(&vi->refill, refill_work);
2279 for (i = 0; i < vi->max_queue_pairs; i++) {
2280 vi->rq[i].pages = NULL;
2281 netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
2282 napi_weight);
2283 netif_tx_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx,
2284 napi_tx ? napi_weight : 0);
2285
2286 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
2287 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
2288 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
2289 }
2290
2291 return 0;
2292
2293 err_rq:
2294 kfree(vi->sq);
2295 err_sq:
2296 return -ENOMEM;
2297 }
2298
2299 static int init_vqs(struct virtnet_info *vi)
2300 {
2301 int ret;
2302
2303 /* Allocate send & receive queues */
2304 ret = virtnet_alloc_queues(vi);
2305 if (ret)
2306 goto err;
2307
2308 ret = virtnet_find_vqs(vi);
2309 if (ret)
2310 goto err_free;
2311
2312 get_online_cpus();
2313 virtnet_set_affinity(vi);
2314 put_online_cpus();
2315
2316 return 0;
2317
2318 err_free:
2319 virtnet_free_queues(vi);
2320 err:
2321 return ret;
2322 }
2323
2324 #ifdef CONFIG_SYSFS
2325 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
2326 struct rx_queue_attribute *attribute, char *buf)
2327 {
2328 struct virtnet_info *vi = netdev_priv(queue->dev);
2329 unsigned int queue_index = get_netdev_rx_queue_index(queue);
2330 struct ewma_pkt_len *avg;
2331
2332 BUG_ON(queue_index >= vi->max_queue_pairs);
2333 avg = &vi->rq[queue_index].mrg_avg_pkt_len;
2334 return sprintf(buf, "%u\n",
2335 get_mergeable_buf_len(&vi->rq[queue_index], avg));
2336 }
2337
2338 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
2339 __ATTR_RO(mergeable_rx_buffer_size);
2340
2341 static struct attribute *virtio_net_mrg_rx_attrs[] = {
2342 &mergeable_rx_buffer_size_attribute.attr,
2343 NULL
2344 };
2345
2346 static const struct attribute_group virtio_net_mrg_rx_group = {
2347 .name = "virtio_net",
2348 .attrs = virtio_net_mrg_rx_attrs
2349 };
2350 #endif
2351
2352 static bool virtnet_fail_on_feature(struct virtio_device *vdev,
2353 unsigned int fbit,
2354 const char *fname, const char *dname)
2355 {
2356 if (!virtio_has_feature(vdev, fbit))
2357 return false;
2358
2359 dev_err(&vdev->dev, "device advertises feature %s but not %s",
2360 fname, dname);
2361
2362 return true;
2363 }
2364
2365 #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
2366 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
2367
2368 static bool virtnet_validate_features(struct virtio_device *vdev)
2369 {
2370 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
2371 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
2372 "VIRTIO_NET_F_CTRL_VQ") ||
2373 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
2374 "VIRTIO_NET_F_CTRL_VQ") ||
2375 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
2376 "VIRTIO_NET_F_CTRL_VQ") ||
2377 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
2378 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
2379 "VIRTIO_NET_F_CTRL_VQ"))) {
2380 return false;
2381 }
2382
2383 return true;
2384 }
2385
2386 #define MIN_MTU ETH_MIN_MTU
2387 #define MAX_MTU ETH_MAX_MTU
2388
2389 static int virtnet_validate(struct virtio_device *vdev)
2390 {
2391 if (!vdev->config->get) {
2392 dev_err(&vdev->dev, "%s failure: config access disabled\n",
2393 __func__);
2394 return -EINVAL;
2395 }
2396
2397 if (!virtnet_validate_features(vdev))
2398 return -EINVAL;
2399
2400 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
2401 int mtu = virtio_cread16(vdev,
2402 offsetof(struct virtio_net_config,
2403 mtu));
2404 if (mtu < MIN_MTU)
2405 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
2406 }
2407
2408 return 0;
2409 }
2410
2411 static int virtnet_probe(struct virtio_device *vdev)
2412 {
2413 int i, err;
2414 struct net_device *dev;
2415 struct virtnet_info *vi;
2416 u16 max_queue_pairs;
2417 int mtu;
2418
2419 /* Find if host supports multiqueue virtio_net device */
2420 err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
2421 struct virtio_net_config,
2422 max_virtqueue_pairs, &max_queue_pairs);
2423
2424 /* We need at least 2 queue's */
2425 if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2426 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
2427 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
2428 max_queue_pairs = 1;
2429
2430 /* Allocate ourselves a network device with room for our info */
2431 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
2432 if (!dev)
2433 return -ENOMEM;
2434
2435 /* Set up network device as normal. */
2436 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
2437 dev->netdev_ops = &virtnet_netdev;
2438 dev->features = NETIF_F_HIGHDMA;
2439
2440 dev->ethtool_ops = &virtnet_ethtool_ops;
2441 SET_NETDEV_DEV(dev, &vdev->dev);
2442
2443 /* Do we support "hardware" checksums? */
2444 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
2445 /* This opens up the world of extra features. */
2446 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
2447 if (csum)
2448 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
2449
2450 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
2451 dev->hw_features |= NETIF_F_TSO
2452 | NETIF_F_TSO_ECN | NETIF_F_TSO6;
2453 }
2454 /* Individual feature bits: what can host handle? */
2455 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
2456 dev->hw_features |= NETIF_F_TSO;
2457 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
2458 dev->hw_features |= NETIF_F_TSO6;
2459 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
2460 dev->hw_features |= NETIF_F_TSO_ECN;
2461
2462 dev->features |= NETIF_F_GSO_ROBUST;
2463
2464 if (gso)
2465 dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
2466 /* (!csum && gso) case will be fixed by register_netdev() */
2467 }
2468 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
2469 dev->features |= NETIF_F_RXCSUM;
2470
2471 dev->vlan_features = dev->features;
2472
2473 /* MTU range: 68 - 65535 */
2474 dev->min_mtu = MIN_MTU;
2475 dev->max_mtu = MAX_MTU;
2476
2477 /* Configuration may specify what MAC to use. Otherwise random. */
2478 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
2479 virtio_cread_bytes(vdev,
2480 offsetof(struct virtio_net_config, mac),
2481 dev->dev_addr, dev->addr_len);
2482 else
2483 eth_hw_addr_random(dev);
2484
2485 /* Set up our device-specific information */
2486 vi = netdev_priv(dev);
2487 vi->dev = dev;
2488 vi->vdev = vdev;
2489 vdev->priv = vi;
2490 vi->stats = alloc_percpu(struct virtnet_stats);
2491 err = -ENOMEM;
2492 if (vi->stats == NULL)
2493 goto free;
2494
2495 for_each_possible_cpu(i) {
2496 struct virtnet_stats *virtnet_stats;
2497 virtnet_stats = per_cpu_ptr(vi->stats, i);
2498 u64_stats_init(&virtnet_stats->tx_syncp);
2499 u64_stats_init(&virtnet_stats->rx_syncp);
2500 }
2501
2502 INIT_WORK(&vi->config_work, virtnet_config_changed_work);
2503
2504 /* If we can receive ANY GSO packets, we must allocate large ones. */
2505 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
2506 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
2507 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
2508 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
2509 vi->big_packets = true;
2510
2511 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
2512 vi->mergeable_rx_bufs = true;
2513
2514 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
2515 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
2516 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2517 else
2518 vi->hdr_len = sizeof(struct virtio_net_hdr);
2519
2520 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
2521 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
2522 vi->any_header_sg = true;
2523
2524 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
2525 vi->has_cvq = true;
2526
2527 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
2528 mtu = virtio_cread16(vdev,
2529 offsetof(struct virtio_net_config,
2530 mtu));
2531 if (mtu < dev->min_mtu) {
2532 /* Should never trigger: MTU was previously validated
2533 * in virtnet_validate.
2534 */
2535 dev_err(&vdev->dev, "device MTU appears to have changed "
2536 "it is now %d < %d", mtu, dev->min_mtu);
2537 goto free_stats;
2538 }
2539
2540 dev->mtu = mtu;
2541 dev->max_mtu = mtu;
2542
2543 /* TODO: size buffers correctly in this case. */
2544 if (dev->mtu > ETH_DATA_LEN)
2545 vi->big_packets = true;
2546 }
2547
2548 if (vi->any_header_sg)
2549 dev->needed_headroom = vi->hdr_len;
2550
2551 /* Enable multiqueue by default */
2552 if (num_online_cpus() >= max_queue_pairs)
2553 vi->curr_queue_pairs = max_queue_pairs;
2554 else
2555 vi->curr_queue_pairs = num_online_cpus();
2556 vi->max_queue_pairs = max_queue_pairs;
2557
2558 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
2559 err = init_vqs(vi);
2560 if (err)
2561 goto free_stats;
2562
2563 #ifdef CONFIG_SYSFS
2564 if (vi->mergeable_rx_bufs)
2565 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
2566 #endif
2567 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
2568 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
2569
2570 virtnet_init_settings(dev);
2571
2572 err = register_netdev(dev);
2573 if (err) {
2574 pr_debug("virtio_net: registering device failed\n");
2575 goto free_vqs;
2576 }
2577
2578 virtio_device_ready(vdev);
2579
2580 err = virtnet_cpu_notif_add(vi);
2581 if (err) {
2582 pr_debug("virtio_net: registering cpu notifier failed\n");
2583 goto free_unregister_netdev;
2584 }
2585
2586 virtnet_set_queues(vi, vi->curr_queue_pairs);
2587
2588 /* Assume link up if device can't report link status,
2589 otherwise get link status from config. */
2590 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
2591 netif_carrier_off(dev);
2592 schedule_work(&vi->config_work);
2593 } else {
2594 vi->status = VIRTIO_NET_S_LINK_UP;
2595 netif_carrier_on(dev);
2596 }
2597
2598 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
2599 dev->name, max_queue_pairs);
2600
2601 return 0;
2602
2603 free_unregister_netdev:
2604 vi->vdev->config->reset(vdev);
2605
2606 unregister_netdev(dev);
2607 free_vqs:
2608 cancel_delayed_work_sync(&vi->refill);
2609 free_receive_page_frags(vi);
2610 virtnet_del_vqs(vi);
2611 free_stats:
2612 free_percpu(vi->stats);
2613 free:
2614 free_netdev(dev);
2615 return err;
2616 }
2617
2618 static void _remove_vq_common(struct virtnet_info *vi)
2619 {
2620 vi->vdev->config->reset(vi->vdev);
2621 free_unused_bufs(vi);
2622 _free_receive_bufs(vi);
2623 free_receive_page_frags(vi);
2624 virtnet_del_vqs(vi);
2625 }
2626
2627 static void remove_vq_common(struct virtnet_info *vi)
2628 {
2629 vi->vdev->config->reset(vi->vdev);
2630
2631 /* Free unused buffers in both send and recv, if any. */
2632 free_unused_bufs(vi);
2633
2634 free_receive_bufs(vi);
2635
2636 free_receive_page_frags(vi);
2637
2638 virtnet_del_vqs(vi);
2639 }
2640
2641 static void virtnet_remove(struct virtio_device *vdev)
2642 {
2643 struct virtnet_info *vi = vdev->priv;
2644
2645 virtnet_cpu_notif_remove(vi);
2646
2647 /* Make sure no work handler is accessing the device. */
2648 flush_work(&vi->config_work);
2649
2650 unregister_netdev(vi->dev);
2651
2652 remove_vq_common(vi);
2653
2654 free_percpu(vi->stats);
2655 free_netdev(vi->dev);
2656 }
2657
2658 #ifdef CONFIG_PM_SLEEP
2659 static int virtnet_freeze(struct virtio_device *vdev)
2660 {
2661 struct virtnet_info *vi = vdev->priv;
2662
2663 virtnet_cpu_notif_remove(vi);
2664 virtnet_freeze_down(vdev);
2665 remove_vq_common(vi);
2666
2667 return 0;
2668 }
2669
2670 static int virtnet_restore(struct virtio_device *vdev)
2671 {
2672 struct virtnet_info *vi = vdev->priv;
2673 int err;
2674
2675 err = virtnet_restore_up(vdev);
2676 if (err)
2677 return err;
2678 virtnet_set_queues(vi, vi->curr_queue_pairs);
2679
2680 err = virtnet_cpu_notif_add(vi);
2681 if (err)
2682 return err;
2683
2684 return 0;
2685 }
2686 #endif
2687
2688 static struct virtio_device_id id_table[] = {
2689 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2690 { 0 },
2691 };
2692
2693 #define VIRTNET_FEATURES \
2694 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
2695 VIRTIO_NET_F_MAC, \
2696 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
2697 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
2698 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
2699 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
2700 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
2701 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
2702 VIRTIO_NET_F_CTRL_MAC_ADDR, \
2703 VIRTIO_NET_F_MTU
2704
2705 static unsigned int features[] = {
2706 VIRTNET_FEATURES,
2707 };
2708
2709 static unsigned int features_legacy[] = {
2710 VIRTNET_FEATURES,
2711 VIRTIO_NET_F_GSO,
2712 VIRTIO_F_ANY_LAYOUT,
2713 };
2714
2715 static struct virtio_driver virtio_net_driver = {
2716 .feature_table = features,
2717 .feature_table_size = ARRAY_SIZE(features),
2718 .feature_table_legacy = features_legacy,
2719 .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
2720 .driver.name = KBUILD_MODNAME,
2721 .driver.owner = THIS_MODULE,
2722 .id_table = id_table,
2723 .validate = virtnet_validate,
2724 .probe = virtnet_probe,
2725 .remove = virtnet_remove,
2726 .config_changed = virtnet_config_changed,
2727 #ifdef CONFIG_PM_SLEEP
2728 .freeze = virtnet_freeze,
2729 .restore = virtnet_restore,
2730 #endif
2731 };
2732
2733 static __init int virtio_net_driver_init(void)
2734 {
2735 int ret;
2736
2737 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online",
2738 virtnet_cpu_online,
2739 virtnet_cpu_down_prep);
2740 if (ret < 0)
2741 goto out;
2742 virtionet_online = ret;
2743 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead",
2744 NULL, virtnet_cpu_dead);
2745 if (ret)
2746 goto err_dead;
2747
2748 ret = register_virtio_driver(&virtio_net_driver);
2749 if (ret)
2750 goto err_virtio;
2751 return 0;
2752 err_virtio:
2753 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
2754 err_dead:
2755 cpuhp_remove_multi_state(virtionet_online);
2756 out:
2757 return ret;
2758 }
2759 module_init(virtio_net_driver_init);
2760
2761 static __exit void virtio_net_driver_exit(void)
2762 {
2763 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
2764 cpuhp_remove_multi_state(virtionet_online);
2765 unregister_virtio_driver(&virtio_net_driver);
2766 }
2767 module_exit(virtio_net_driver_exit);
2768
2769 MODULE_DEVICE_TABLE(virtio, id_table);
2770 MODULE_DESCRIPTION("Virtio network driver");
2771 MODULE_LICENSE("GPL");